579 files changed, 16315 insertions, 6684 deletions
diff --git a/.ci/premerge_advisor_upload.py b/.ci/premerge_advisor_upload.py
new file mode 100644
index 0000000..84214f8
--- /dev/null
+++ b/.ci/premerge_advisor_upload.py
@@ -0,0 +1,56 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Script for uploading results to the premerge advisor."""
+
+import argparse
+import os
+import platform
+import sys
+
+import requests
+
+import generate_test_report_lib
+
+PREMERGE_ADVISOR_URL = (
+    "http://premerge-advisor.premerge-advisor.svc.cluster.local:5000/upload"
+)
+
+
+def main(commit_sha, workflow_run_number, build_log_files):
+    junit_objects, ninja_logs = generate_test_report_lib.load_info_from_files(
+        build_log_files
+    )
+    test_failures = generate_test_report_lib.get_failures(junit_objects)
+    source = "pull_request" if "GITHUB_ACTIONS" in os.environ else "postcommit"
+    failure_info = {
+        "source_type": source,
+        "base_commit_sha": commit_sha,
+        "source_id": workflow_run_number,
+        "failures": [],
+    }
+    if test_failures:
+        for name, failure_message in test_failures:
+            failure_info["failures"].append({"name": name, "message": failure_message})
+    else:
+        ninja_failures = generate_test_report_lib.find_failure_in_ninja_logs(ninja_logs)
+        for name, failure_message in ninja_failures:
+            failure_info["failures"].append({"name": name, "message": failure_message})
+    requests.post(PREMERGE_ADVISOR_URL, json=failure_info)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("commit_sha", help="The base commit SHA for the test.")
+    parser.add_argument("workflow_run_number", help="The run number from GHA.")
+    parser.add_argument(
+        "build_log_files", help="Paths to JUnit report files and ninja logs.", nargs="*"
+    )
+    args = parser.parse_args()
+
+    # Skip uploading results on AArch64 for now because the premerge advisor
+    # service is not available on AWS currently.
+    if platform.machine() == "arm64":
+        sys.exit(0)
+
+    main(args.commit_sha, args.workflow_run_number, args.build_log_files)
diff --git a/.ci/utils.sh b/.ci/utils.sh
index 5d32968..9aefcf2 100644
--- a/.ci/utils.sh
+++ b/.ci/utils.sh
@@ -38,6 +38,12 @@ function at-exit {
       $retcode "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log \
       >> $GITHUB_STEP_SUMMARY
   fi
+
+  if [[ "$retcode" != "0" ]]; then
+    python "${MONOREPO_ROOT}"/.ci/premerge_advisor_upload.py \
+      $(git rev-parse HEAD~1) $GITHUB_RUN_NUMBER \
+      "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log
+  fi
 }
 trap at-exit EXIT
 
diff --git a/bolt/include/bolt/Passes/PLTCall.h b/bolt/include/bolt/Passes/PLTCall.h
index 09ef96e..9c46f5d 100644
--- a/bolt/include/bolt/Passes/PLTCall.h
+++ b/bolt/include/bolt/Passes/PLTCall.h
@@ -26,7 +26,7 @@ public:
   explicit PLTCall(const cl::opt<bool> &PrintPass)
       : BinaryFunctionPass(PrintPass) {}
 
-  const char *getName() const override { return "PLT call optimization"; }
+  const char *getName() const override { return "plt-call-optimization"; }
   bool shouldPrint(const BinaryFunction &BF) const override {
     return BinaryFunctionPass::shouldPrint(BF);
   }
diff --git a/bolt/include/bolt/Passes/TailDuplication.h b/bolt/include/bolt/Passes/TailDuplication.h
index a2fcab0..4a7ec08 100644
--- a/bolt/include/bolt/Passes/TailDuplication.h
+++ b/bolt/include/bolt/Passes/TailDuplication.h
@@ -143,7 +143,7 @@ public:
 
   explicit TailDuplication() : BinaryFunctionPass(false) {}
 
-  const char *getName() const override { return "tail duplication"; }
+  const char *getName() const override { return "tail-duplication"; }
 
   Error runOnFunctions(BinaryContext &BC) override;
 };
diff --git a/clang-tools-extra/clang-tidy/.clang-format b/clang-tools-extra/clang-tidy/.clang-format
index d18cf7c..5b50661 100644
--- a/clang-tools-extra/clang-tidy/.clang-format
+++ b/clang-tools-extra/clang-tidy/.clang-format
@@ -1,2 +1,3 @@
 BasedOnStyle: LLVM
 QualifierAlignment: Left
+LineEnding: LF
diff --git a/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.cpp
index 4fc1b3b..76df992 100644
--- a/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/InvalidEnumDefaultInitializationCheck.cpp
@@ -69,14 +69,14 @@ public:
     return Visit(T->getElementType().getTypePtr());
   }
   bool VisitEnumType(const EnumType *T) {
-    if (isCompleteAndHasNoZeroValue(T->getOriginalDecl())) {
+    if (isCompleteAndHasNoZeroValue(T->getDecl())) {
       FoundEnum = T;
       return true;
     }
     return false;
   }
   bool VisitRecordType(const RecordType *T) {
-    const RecordDecl *RD = T->getOriginalDecl()->getDefinition();
+    const RecordDecl *RD = T->getDecl()->getDefinition();
     if (!RD || RD->isUnion())
       return false;
     auto VisitField = [this](const FieldDecl *F) {
@@ -139,7 +139,7 @@ void InvalidEnumDefaultInitializationCheck::check(
     if (!Finder.Visit(InitList->getArrayFiller()->getType().getTypePtr()))
       return;
     InitExpr = InitList;
-    Enum = Finder.FoundEnum->getOriginalDecl();
+    Enum = Finder.FoundEnum->getDecl();
   }
 
   if (!InitExpr || !Enum)
diff --git a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp
index d467684..3dd0a50 100644
--- a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp
@@ -64,15 +64,17 @@ static unsigned getLength(const Expr *E,
   if (!E)
     return 0;
 
-  Expr::EvalResult Length;
   E = E->IgnoreImpCasts();
 
   if (const auto *LengthDRE = dyn_cast<DeclRefExpr>(E))
     if (const auto *LengthVD = dyn_cast<VarDecl>(LengthDRE->getDecl()))
       if (!isa<ParmVarDecl>(LengthVD))
-        if (const Expr *LengthInit = LengthVD->getInit())
+        if (const Expr *LengthInit = LengthVD->getInit();
+            LengthInit && !LengthInit->isValueDependent()) {
+          Expr::EvalResult Length;
           if (LengthInit->EvaluateAsInt(Length, *Result.Context))
             return Length.Val.getInt().getZExtValue();
+        }
 
   if (const auto *LengthIL = dyn_cast<IntegerLiteral>(E))
     return LengthIL->getValue().getZExtValue();
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp
index 5de4e33..37d737a 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp
@@ -190,7 +190,7 @@ struct InitializerInsertion {
 // Convenience utility to get a RecordDecl from a QualType.
 const RecordDecl *getCanonicalRecordDecl(const QualType &Type) {
   if (const auto *RT = Type->getAsCanonical<RecordType>())
-    return RT->getOriginalDecl();
+    return RT->getDecl();
   return nullptr;
 }
 
diff --git a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
index a038af4..6d5182d 100644
--- a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
+++ b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
@@ -72,7 +72,7 @@ static bool isStdInitializerList(QualType Type) {
   }
   if (const auto *RT = Type->getAs<RecordType>()) {
     if (const auto *Specialization =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getOriginalDecl()))
+            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
       return declIsStdInitializerList(Specialization->getSpecializedTemplate());
   }
   return false;
diff --git a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp
index 31524e4..81840cc9 100644
--- a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp
@@ -132,7 +132,7 @@ void UnusedUsingDeclsCheck::check(const MatchFinder::MatchResult &Result) {
     }
     if (const auto *ECD = dyn_cast<EnumConstantDecl>(Used)) {
       if (const auto *ET = ECD->getType()->getAsCanonical<EnumType>())
-        removeFromFoundDecls(ET->getOriginalDecl());
+        removeFromFoundDecls(ET->getDecl());
     }
   };
   // We rely on the fact that the clang AST is walked in order, usages are only
diff --git a/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp
index aa1ee6d..a004480 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseScopedLockCheck.cpp
@@ -29,7 +29,7 @@ static bool isLockGuardDecl(const NamedDecl *Decl) {
 
 static bool isLockGuard(const QualType &Type) {
   if (const auto *Record = Type->getAsCanonical<RecordType>())
-    if (const RecordDecl *Decl = Record->getOriginalDecl())
+    if (const RecordDecl *Decl = Record->getDecl())
       return isLockGuardDecl(Decl);
 
   if (const auto *TemplateSpecType = Type->getAs<TemplateSpecializationType>())
diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp
index 3e27d8f..d623ec4 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp
@@ -77,7 +77,7 @@ public:
       if (T->getKeyword() != ElaboratedTypeKeyword::None ||
           TTL.getQualifierLoc())
         break;
-      if (visitUnqualName(T->getOriginalDecl()->getName()))
+      if (visitUnqualName(T->getDecl()->getName()))
         return false;
       break;
     }
diff --git a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp
index 29084f4..d1738f1 100644
--- a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp
@@ -413,11 +413,10 @@ static bool areTypesCompatible(QualType ArgType, QualType ParamType,
 
   // Arithmetic types are interconvertible, except scoped enums.
   if (ParamType->isArithmeticType() && ArgType->isArithmeticType()) {
-    if ((ParamType->isEnumeralType() && ParamType->castAsCanonical<EnumType>()
-                                            ->getOriginalDecl()
-                                            ->isScoped()) ||
+    if ((ParamType->isEnumeralType() &&
+         ParamType->castAsCanonical<EnumType>()->getDecl()->isScoped()) ||
         (ArgType->isEnumeralType() &&
-         ArgType->castAsCanonical<EnumType>()->getOriginalDecl()->isScoped()))
+         ArgType->castAsCanonical<EnumType>()->getDecl()->isScoped()))
       return false;
 
     return true;
diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
index 70f6092..71d252f 100644
--- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
@@ -331,7 +331,7 @@ public:
   }
 
   bool VisitTagTypeLoc(const TagTypeLoc &Loc) {
-    Check->addUsage(Loc.getOriginalDecl(), Loc.getNameLoc(), SM);
+    Check->addUsage(Loc.getDecl(), Loc.getNameLoc(), SM);
     return true;
   }
 
diff --git a/clang-tools-extra/clangd/DumpAST.cpp b/clang-tools-extra/clangd/DumpAST.cpp
index 9a8d41d..cd409a2 100644
--- a/clang-tools-extra/clangd/DumpAST.cpp
+++ b/clang-tools-extra/clangd/DumpAST.cpp
@@ -261,7 +261,7 @@ class DumpVisitor : public RecursiveASTVisitor<DumpVisitor> {
       return TL.getType().getLocalQualifiers().getAsString(
           Ctx.getPrintingPolicy());
     if (const auto *TT = dyn_cast<TagType>(TL.getTypePtr()))
-      return getDetail(TT->getOriginalDecl());
+      return getDetail(TT->getDecl());
     if (const auto *DT = dyn_cast<DeducedType>(TL.getTypePtr()))
       if (DT->isDeduced())
         return DT->getDeducedType().getAsString(Ctx.getPrintingPolicy());
diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp
index 5d5388e..ce79f88 100644
--- a/clang-tools-extra/clangd/FindTarget.cpp
+++ b/clang-tools-extra/clangd/FindTarget.cpp
@@ -366,7 +366,7 @@ public:
       Visitor(TargetFinder &Outer, RelSet Flags) : Outer(Outer), Flags(Flags) {}
 
       void VisitTagType(const TagType *TT) {
-        Outer.add(cast<TagType>(TT)->getOriginalDecl(), Flags);
+        Outer.add(cast<TagType>(TT)->getDecl(), Flags);
       }
 
       void VisitUsingType(const UsingType *ET) {
@@ -861,7 +861,7 @@ refInTypeLoc(TypeLoc L, const HeuristicResolver *Resolver) {
       Refs.push_back(ReferenceLoc{L.getQualifierLoc(),
                                   L.getNameLoc(),
                                   /*IsDecl=*/false,
-                                  {L.getOriginalDecl()}});
+                                  {L.getDecl()}});
     }
 
     void VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc L) {
diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index acc8e87..34369e1 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -179,7 +179,7 @@ HoverInfo::PrintedType printType(QualType QT, ASTContext &ASTCtx,
   if (!QT.isNull() && !QT.hasQualifiers() && PP.SuppressTagKeyword) {
     if (auto *TT = llvm::dyn_cast<TagType>(QT.getTypePtr());
         TT && TT->isCanonicalUnqualified())
-      OS << TT->getOriginalDecl()->getKindName() << " ";
+      OS << TT->getDecl()->getKindName() << " ";
   }
   QT.print(OS, PP);
 
diff --git a/clang-tools-extra/clangd/IncludeFixer.cpp b/clang-tools-extra/clangd/IncludeFixer.cpp
index c27d960..3f3d7fb 100644
--- a/clang-tools-extra/clangd/IncludeFixer.cpp
+++ b/clang-tools-extra/clangd/IncludeFixer.cpp
@@ -173,7 +173,7 @@ std::vector<Fix> IncludeFixer::fix(DiagnosticsEngine::Level DiagLevel,
           // `enum x : int;' is not formally an incomplete type.
           // We may need a full definition anyway.
           if (auto * ET = llvm::dyn_cast<EnumType>(T))
-            if (!ET->getOriginalDecl()->getDefinition())
+            if (!ET->getDecl()->getDefinition())
               return fixIncompleteType(*T);
         }
       }
diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp
index d56b93e..23bd023 100644
--- a/clang-tools-extra/clangd/InlayHints.cpp
+++ b/clang-tools-extra/clangd/InlayHints.cpp
@@ -60,7 +60,7 @@ const NamedDecl *getDeclForType(const Type *T) {
   case Type::Enum:
   case Type::Record:
   case Type::InjectedClassName:
-    return cast<TagType>(T)->getOriginalDecl();
+    return cast<TagType>(T)->getDecl();
   case Type::TemplateSpecialization:
     return cast<TemplateSpecializationType>(T)
         ->getTemplateName()
diff --git a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp
index f65c74f..3f43362 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp
@@ -322,7 +322,7 @@ bool AddUsing::prepare(const Selection &Inputs) {
       if (!QualifierToRemove)
         break;
       SpelledNameRange = TL.getNameLoc();
-      MustInsertAfterLoc = TL.getOriginalDecl()->getBeginLoc();
+      MustInsertAfterLoc = TL.getDecl()->getBeginLoc();
       break;
     }
     case TypeLoc::Typedef: {
diff --git a/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp b/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp
index 2c98417..769d73f 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/PopulateSwitch.cpp
@@ -116,7 +116,7 @@ bool PopulateSwitch::prepare(const Selection &Sel) {
   EnumT = Cond->getType()->getAsCanonical<EnumType>();
   if (!EnumT)
     return false;
-  EnumD = EnumT->getOriginalDecl()->getDefinitionOrSelf();
+  EnumD = EnumT->getDecl()->getDefinitionOrSelf();
   if (EnumD->isDependentType())
     return false;
 
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 33cc401..a94dd97 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -274,6 +274,10 @@ Changes in existing checks
   <clang-tidy/checks/bugprone/narrowing-conversions>` check by fixing
   false positive from analysis of a conditional expression in C.
 
+- Improved :doc:`bugprone-not-null-terminated-result
+  <clang-tidy/checks/bugprone/not-null-terminated-result>` check by fixing
+  a crash caused by certain value-dependent expressions.
+
 - Improved :doc:`bugprone-reserved-identifier
   <clang-tidy/checks/bugprone/reserved-identifier>` check by ignoring
   declarations and macros in system headers.
diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
index 7bbdc8b..d444ddd 100644
--- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
+++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
@@ -342,7 +342,7 @@ public:
   }
 
   bool VisitTagTypeLoc(TagTypeLoc TTL) {
-    reportType(TTL.getNameLoc(), TTL.getOriginalDecl());
+    reportType(TTL.getNameLoc(), TTL.getDecl());
     return true;
   }
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-value-dependent-crash.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-value-dependent-crash.cpp
new file mode 100644
index 0000000..5f361c3
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-value-dependent-crash.cpp
@@ -0,0 +1,23 @@
+// RUN: %check_clang_tidy %s bugprone-not-null-terminated-result %t -- \
+// RUN: -- -std=c++17 -I %S/Inputs/not-null-terminated-result
+
+// This test case reproduces the crash when the check tries to evaluate
+// a value-dependent expression using EvaluateAsInt() in
+// bugprone-not-null-terminated-result, where the src parameter of memcpy is
+// value-dependent, but the length is not.
+
+// expected-no-diagnostics
+
+#include "not-null-terminated-result-cxx.h"
+
+template<size_t N>
+class ValueDependentClass {
+public:
+  void copyData(char* Dst) {
+    const char* Src = reinterpret_cast<const char*>(this);
+    // The length parameter is arbitrary, but the crash is not reproduced if it is N.
+    memcpy(Dst, Src, 32);
+  }
+};
+
+template class ValueDependentClass<42>; // The template parameter value is arbitrary.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 79ff821..afe3d46 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -276,7 +276,7 @@ New Compiler Flags
 - New option ``-fno-sanitize-debug-trap-reasons`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
 - New option ``-fsanitize-debug-trap-reasons=`` added to control emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
 - New options for enabling allocation token instrumentation: ``-fsanitize=alloc-token``, ``-falloc-token-max=``, ``-fsanitize-alloc-token-fast-abi``, ``-fsanitize-alloc-token-extended``.
-
+- The ``-resource-dir`` option is now displayed in the list of options shown by ``--help``.
 
 Lanai Support
 ^^^^^^^^^^^^^^
@@ -413,6 +413,7 @@ Bug Fixes in This Version
   (#GH159080)
 - Fixed a failed assertion with empty filename arguments in ``__has_embed``. (#GH159898)
 - Fixed a failed assertion with empty filename in ``#embed`` directive. (#GH162951)
+- Fixed a crash triggered by unterminated ``__has_embed``. (#GH162953)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -426,7 +427,8 @@ Bug Fixes to Attribute Support
   (#GH141504) and on types returned from indirect calls (#GH142453).
 - Fixes some late parsed attributes, when applied to function definitions, not being parsed
   in function try blocks, and some situations where parsing of the function body
-  is skipped, such as error recovery and code completion. (#GH153551)
+  is skipped, such as error recovery, code completion, and msvc-compatible delayed
+  template parsing. (#GH153551)
 - Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with
   ``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520)
 - Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075)
@@ -515,6 +517,7 @@ X86 Support
   driver.
 - Remove `[no-]evex512` feature request from intrinsics and builtins.
 - Change features `avx10.x-[256,512]` to `avx10.x`.
+- `-march=wildcatlake` is now supported.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 01f0b27..e82b16f 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2325,7 +2325,7 @@ are listed below.
    devirtualization and virtual constant propagation, for classes with
    :doc:`hidden LTO visibility <LTOVisibility>`. Requires ``-flto``.
 
-.. option:: -f[no]split-lto-unit
+.. option:: -f[no-]split-lto-unit
 
    Controls splitting the :doc:`LTO unit <LTOVisibility>` into regular LTO and
    :doc:`ThinLTO` portions, when compiling with -flto=thin. Defaults to false
@@ -2518,7 +2518,7 @@ are listed below.
 
 .. _funique_internal_linkage_names:
 
-.. option:: -f[no]-unique-internal-linkage-names
+.. option:: -f[no-]unique-internal-linkage-names
 
    Controls whether Clang emits a unique (best-effort) symbol name for internal
    linkage symbols.  When this option is set, compiler hashes the main source
@@ -2539,7 +2539,7 @@ are listed below.
      $ cd $P/bar && clang -c -funique-internal-linkage-names name_conflict.c
      $ cd $P && clang foo/name_conflict.o && bar/name_conflict.o
 
-.. option:: -f[no]-basic-block-address-map:
+.. option:: -f[no-]basic-block-address-map:
   Emits a ``SHT_LLVM_BB_ADDR_MAP`` section which includes address offsets for each
   basic block in the program, relative to the parent function address.
 
diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h
index 0921604..e74bb72 100644
--- a/clang/include/clang/AST/ASTNodeTraverser.h
+++ b/clang/include/clang/AST/ASTNodeTraverser.h
@@ -770,7 +770,7 @@ public:
       // it will not be in the parent context:
       if (auto *TT = D->getFriendType()->getType()->getAs<TagType>())
         if (TT->isTagOwned())
-          Visit(TT->getOriginalDecl());
+          Visit(TT->getDecl());
     } else {
       Visit(D->getFriendDecl());
     }
diff --git a/clang/include/clang/AST/CanonicalType.h b/clang/include/clang/AST/CanonicalType.h
index b5a4e94e13..87bbd7b 100644
--- a/clang/include/clang/AST/CanonicalType.h
+++ b/clang/include/clang/AST/CanonicalType.h
@@ -551,18 +551,18 @@ struct CanProxyAdaptor<UnaryTransformType>
 
 template<>
 struct CanProxyAdaptor<TagType> : public CanProxyBase<TagType> {
-  LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(TagDecl *, getOriginalDecl)
+  LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(TagDecl *, getDecl)
 };
 
 template<>
 struct CanProxyAdaptor<RecordType> : public CanProxyBase<RecordType> {
-  LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(RecordDecl *, getOriginalDecl)
+  LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(RecordDecl *, getDecl)
   LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(bool, hasConstFields)
 };
 
 template<>
 struct CanProxyAdaptor<EnumType> : public CanProxyBase<EnumType> {
-  LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(EnumDecl *, getOriginalDecl)
+  LLVM_CLANG_CANPROXY_SIMPLE_ACCESSOR(EnumDecl *, getDecl)
 };
 
 template<>
diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 898487b..dfa3bef 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -3832,7 +3832,7 @@ public:
 
 public:
   EnumDecl *getEnumDecl() const {
-    return EnumType->getType()->castAs<clang::EnumType>()->getOriginalDecl();
+    return EnumType->getType()->castAs<clang::EnumType>()->getDecl();
   }
 
   static UsingEnumDecl *Create(ASTContext &C, DeclContext *DC,
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index c246c4a..32b2b6b 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -1710,7 +1710,7 @@ DEF_TRAVERSE_DECL(FriendDecl, {
     // it will not be in the parent context:
     if (auto *TT = D->getFriendType()->getType()->getAs<TagType>();
         TT && TT->isTagOwned())
-      TRY_TO(TraverseDecl(TT->getOriginalDecl()));
+      TRY_TO(TraverseDecl(TT->getDecl()));
   } else {
     TRY_TO(TraverseDecl(D->getFriendDecl()));
   }
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index df106d5..7bd2441 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -27,7 +27,7 @@ inline CXXRecordDecl *Type::getAsCXXRecordDecl() const {
   const auto *TT = dyn_cast<TagType>(CanonicalType);
   if (!isa_and_present<RecordType, InjectedClassNameType>(TT))
     return nullptr;
-  auto *TD = TT->getOriginalDecl();
+  auto *TD = TT->getDecl();
   if (isa<RecordType>(TT) && !isa<CXXRecordDecl>(TD))
     return nullptr;
   return cast<CXXRecordDecl>(TD)->getDefinitionOrSelf();
@@ -35,41 +35,39 @@ inline CXXRecordDecl *Type::getAsCXXRecordDecl() const {
 
 inline CXXRecordDecl *Type::castAsCXXRecordDecl() const {
   const auto *TT = cast<TagType>(CanonicalType);
-  return cast<CXXRecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
+  return cast<CXXRecordDecl>(TT->getDecl())->getDefinitionOrSelf();
 }
 
 inline RecordDecl *Type::getAsRecordDecl() const {
   const auto *TT = dyn_cast<TagType>(CanonicalType);
   if (!isa_and_present<RecordType, InjectedClassNameType>(TT))
     return nullptr;
-  return cast<RecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
+  return cast<RecordDecl>(TT->getDecl())->getDefinitionOrSelf();
 }
 
 inline RecordDecl *Type::castAsRecordDecl() const {
   const auto *TT = cast<TagType>(CanonicalType);
-  return cast<RecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
+  return cast<RecordDecl>(TT->getDecl())->getDefinitionOrSelf();
 }
 
 inline EnumDecl *Type::getAsEnumDecl() const {
   if (const auto *TT = dyn_cast<EnumType>(CanonicalType))
-    return TT->getOriginalDecl()->getDefinitionOrSelf();
+    return TT->getDecl()->getDefinitionOrSelf();
   return nullptr;
 }
 
 inline EnumDecl *Type::castAsEnumDecl() const {
-  return cast<EnumType>(CanonicalType)
-      ->getOriginalDecl()
-      ->getDefinitionOrSelf();
+  return cast<EnumType>(CanonicalType)->getDecl()->getDefinitionOrSelf();
 }
 
 inline TagDecl *Type::getAsTagDecl() const {
   if (const auto *TT = dyn_cast<TagType>(CanonicalType))
-    return TT->getOriginalDecl()->getDefinitionOrSelf();
+    return TT->getDecl()->getDefinitionOrSelf();
   return nullptr;
 }
 
 inline TagDecl *Type::castAsTagDecl() const {
-  return cast<TagType>(CanonicalType)->getOriginalDecl()->getDefinitionOrSelf();
+  return cast<TagType>(CanonicalType)->getDecl()->getDefinitionOrSelf();
 }
 
 inline bool QualType::hasNonTrivialToPrimitiveDefaultInitializeCUnion() const {
diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index 625cc77..5892566 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -6419,10 +6419,10 @@ protected:
           bool IsInjected, const Type *CanonicalType);
 
 public:
-  // FIXME: Temporarily renamed from `getDecl` in order to facilitate
-  // rebasing, due to change in behaviour. This should be renamed back
-  // to `getDecl` once the change is settled.
-  TagDecl *getOriginalDecl() const { return decl; }
+  TagDecl *getDecl() const { return decl; }
+  [[deprecated("Use getDecl instead")]] TagDecl *getOriginalDecl() const {
+    return decl;
+  }
 
   NestedNameSpecifier getQualifier() const;
 
@@ -6463,7 +6463,7 @@ struct TagTypeFoldingSetPlaceholder : public llvm::FoldingSetNode {
 
   void Profile(llvm::FoldingSetNodeID &ID) const {
     const TagType *T = getTagType();
-    Profile(ID, T->getKeyword(), T->getQualifier(), T->getOriginalDecl(),
+    Profile(ID, T->getKeyword(), T->getQualifier(), T->getDecl(),
             T->isTagOwned(), T->isInjected());
   }
 
@@ -6487,11 +6487,11 @@ class RecordType final : public TagType {
   using TagType::TagType;
 
 public:
-  // FIXME: Temporarily renamed from `getDecl` in order to facilitate
-  // rebasing, due to change in behaviour. This should be renamed back
-  // to `getDecl` once the change is settled.
-  RecordDecl *getOriginalDecl() const {
-    return reinterpret_cast<RecordDecl *>(TagType::getOriginalDecl());
+  RecordDecl *getDecl() const {
+    return reinterpret_cast<RecordDecl *>(TagType::getDecl());
+  }
+  [[deprecated("Use getDecl instead")]] RecordDecl *getOriginalDecl() const {
+    return getDecl();
   }
 
   /// Recursively check all fields in the record for const-ness. If any field
@@ -6507,11 +6507,11 @@ class EnumType final : public TagType {
   using TagType::TagType;
 
 public:
-  // FIXME: Temporarily renamed from `getDecl` in order to facilitate
-  // rebasing, due to change in behaviour. This should be renamed back
-  // to `getDecl` once the change is settled.
-  EnumDecl *getOriginalDecl() const {
-    return reinterpret_cast<EnumDecl *>(TagType::getOriginalDecl());
+  EnumDecl *getDecl() const {
+    return reinterpret_cast<EnumDecl *>(TagType::getDecl());
+  }
+  [[deprecated("Use getDecl instead")]] EnumDecl *getOriginalDecl() const {
+    return getDecl();
   }
 
   static bool classof(const Type *T) { return T->getTypeClass() == Enum; }
@@ -6542,11 +6542,11 @@ class InjectedClassNameType final : public TagType {
                         bool IsInjected, const Type *CanonicalType);
 
 public:
-  // FIXME: Temporarily renamed from `getDecl` in order to facilitate
-  // rebasing, due to change in behaviour. This should be renamed back
-  // to `getDecl` once the change is settled.
-  CXXRecordDecl *getOriginalDecl() const {
-    return reinterpret_cast<CXXRecordDecl *>(TagType::getOriginalDecl());
+  CXXRecordDecl *getDecl() const {
+    return reinterpret_cast<CXXRecordDecl *>(TagType::getDecl());
+  }
+  [[deprecated("Use getDecl instead")]] CXXRecordDecl *getOriginalDecl() const {
+    return getDecl();
   }
 
   static bool classof(const Type *T) {
@@ -8930,8 +8930,8 @@ inline bool Type::isIntegerType() const {
   if (const EnumType *ET = dyn_cast<EnumType>(CanonicalType)) {
     // Incomplete enum types are not treated as integer types.
     // FIXME: In C++, enum types are never integer types.
-    return IsEnumDeclComplete(ET->getOriginalDecl()) &&
-           !IsEnumDeclScoped(ET->getOriginalDecl());
+    return IsEnumDeclComplete(ET->getDecl()) &&
+           !IsEnumDeclScoped(ET->getDecl());
   }
   return isBitIntType();
 }
@@ -8989,7 +8989,7 @@ inline bool Type::isScalarType() const {
   if (const EnumType *ET = dyn_cast<EnumType>(CanonicalType))
     // Enums are scalar types, but only if they are defined.  Incomplete enums
     // are not treated as scalar types.
-    return IsEnumDeclComplete(ET->getOriginalDecl());
+    return IsEnumDeclComplete(ET->getDecl());
   return isa<PointerType>(CanonicalType) ||
          isa<BlockPointerType>(CanonicalType) ||
          isa<MemberPointerType>(CanonicalType) ||
@@ -9005,7 +9005,7 @@ inline bool Type::isIntegralOrEnumerationType() const {
   // Check for a complete enum type; incomplete enum types are not properly an
   // enumeration type in the sense required here.
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
-    return IsEnumDeclComplete(ET->getOriginalDecl());
+    return IsEnumDeclComplete(ET->getDecl());
 
   return isBitIntType();
 }
diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h
index 3f14ee8..2cefaa9 100644
--- a/clang/include/clang/AST/TypeLoc.h
+++ b/clang/include/clang/AST/TypeLoc.h
@@ -793,7 +793,7 @@ struct TagTypeLocInfo {
 class TagTypeLoc : public ConcreteTypeLoc<UnqualTypeLoc, TagTypeLoc, TagType,
                                           TagTypeLocInfo> {
 public:
-  TagDecl *getOriginalDecl() const { return getTypePtr()->getOriginalDecl(); }
+  TagDecl *getDecl() const { return getTypePtr()->getDecl(); }
 
   /// True if the tag was defined in this type specifier.
   bool isDefinition() const;
@@ -854,9 +854,7 @@ class RecordTypeLoc : public InheritingConcreteTypeLoc<TagTypeLoc,
                                                        RecordTypeLoc,
                                                        RecordType> {
 public:
-  RecordDecl *getOriginalDecl() const {
-    return getTypePtr()->getOriginalDecl();
-  }
+  RecordDecl *getDecl() const { return getTypePtr()->getDecl(); }
 };
 
 /// Wrapper for source info for enum types.
@@ -864,7 +862,7 @@ class EnumTypeLoc : public InheritingConcreteTypeLoc<TagTypeLoc,
                                                      EnumTypeLoc,
                                                      EnumType> {
 public:
-  EnumDecl *getOriginalDecl() const { return getTypePtr()->getOriginalDecl(); }
+  EnumDecl *getDecl() const { return getTypePtr()->getDecl(); }
 };
 
 /// Wrapper for source info for injected class names of class
@@ -873,9 +871,7 @@ class InjectedClassNameTypeLoc
     : public InheritingConcreteTypeLoc<TagTypeLoc, InjectedClassNameTypeLoc,
                                        InjectedClassNameType> {
 public:
-  CXXRecordDecl *getOriginalDecl() const {
-    return getTypePtr()->getOriginalDecl();
-  }
+  CXXRecordDecl *getDecl() const { return getTypePtr()->getDecl(); }
 };
 
 /// Wrapper for template type parameters.
diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td
index 9dc85fb..03613d5 100644
--- a/clang/include/clang/AST/TypeProperties.td
+++ b/clang/include/clang/AST/TypeProperties.td
@@ -575,7 +575,7 @@ let Class = TagType in {
     let Conditional = [{ !IsCanonical }];
     let Read = [{ node->getQualifier() }];
   }
-  def : Property<"TD", TagDeclRef> { let Read = [{ node->getOriginalDecl() }]; }
+  def : Property<"TD", TagDeclRef> { let Read = [{ node->getDecl() }]; }
 }
 
 let Class = EnumType in {
diff --git a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
index 1ab6f11..c050fb7 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
@@ -1017,7 +1017,7 @@ private:
     // First, for any types that have a declaration, extract the declaration and
     // match on it.
     if (const auto *S = dyn_cast<TagType>(&Node)) {
-      return matchesDecl(S->getOriginalDecl(), Finder, Builder);
+      return matchesDecl(S->getDecl(), Finder, Builder);
     }
     if (const auto *S = dyn_cast<TemplateTypeParmType>(&Node)) {
       return matchesDecl(S->getDecl(), Finder, Builder);
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 006a453..279c0c7 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -125,7 +125,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
 
   let Features = "ssse3" in {
     def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
-    def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
     def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
     def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
     def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
@@ -133,6 +132,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
 
   let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
     def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
+    def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
   }
 }
 
@@ -610,7 +610,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
   def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
   def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
   def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
-  def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
   def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
   def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
   def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -649,6 +648,8 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
   def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
   def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
 
+  def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+
   def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
   def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
   def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
@@ -1347,7 +1348,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
   def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
-  def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
 }
 
 let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
@@ -1355,6 +1355,8 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect
   def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
   def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
   def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
+
+  def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
 }
 
 let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 9e85008..5f70b51 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -243,6 +243,7 @@ ENUM_LANGOPT(HLSLVersion, HLSLLangStd, 16, HLSL_Unset, NotCompatible, "HLSL Vers
 LANGOPT(HLSLStrictAvailability, 1, 0, NotCompatible,
         "Strict availability diagnostic mode for HLSL built-in functions.")
 LANGOPT(HLSLSpvUseUnknownImageFormat, 1, 0, NotCompatible, "For storage images and texel buffers, sets the default format to 'Unknown' when not specified via the `vk::image_format` attribute. If this option is not used, the format is inferred from the resource's data type.")
+LANGOPT(HLSLSpvEnableMaximalReconvergence, 1, 0, NotCompatible, "Enables the MaximallyReconvergesKHR execution mode for this module. This ensures that control flow reconverges at well-defined merge points as defined by the Vulkan spec.")
 
 LANGOPT(CUDAIsDevice      , 1, 0, NotCompatible, "compiling for CUDA device")
 LANGOPT(CUDAHostDeviceConstexpr, 1, 1, NotCompatible, "treating unattributed constexpr functions as __host__ __device__")
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 41595ec..260a753 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -756,6 +756,15 @@ public:
   bool isTargetDevice() const {
     return OpenMPIsTargetDevice || CUDAIsDevice || SYCLIsDevice;
   }
+
+  /// Returns the most applicable C standard-compliant language version code.
+  /// If none could be determined, returns \ref std::nullopt.
+  std::optional<uint32_t> getCLangStd() const;
+
+  /// Returns the most applicable C++ standard-compliant language
+  /// version code.
+  /// If none could be determined, returns \ref std::nullopt.
+  std::optional<uint32_t> getCPlusPlusLangStd() const;
 };
 
 /// Floating point control options
diff --git a/clang/include/clang/Basic/LangStandard.h b/clang/include/clang/Basic/LangStandard.h
index 4941223..64645fe 100644
--- a/clang/include/clang/Basic/LangStandard.h
+++ b/clang/include/clang/Basic/LangStandard.h
@@ -70,8 +70,7 @@ enum LangFeatures {
 /// standard.
 struct LangStandard {
   enum Kind {
-#define LANGSTANDARD(id, name, lang, desc, features) \
-    lang_##id,
+#define LANGSTANDARD(id, name, lang, desc, features, version) lang_##id,
 #include "clang/Basic/LangStandards.def"
     lang_unspecified
   };
@@ -80,6 +79,7 @@ struct LangStandard {
   const char *Description;
   unsigned Flags;
   clang::Language Language;
+  std::optional<uint32_t> Version;
 
 public:
   /// getName - Get the name of this standard.
@@ -91,6 +91,9 @@ public:
   /// Get the language that this standard describes.
   clang::Language getLanguage() const { return Language; }
 
+  /// Get the version code for this language standard.
+  std::optional<uint32_t> getVersion() const { return Version; }
+
   /// Language supports '//' comments.
   bool hasLineComments() const { return Flags & LineComment; }
 
diff --git a/clang/include/clang/Basic/LangStandards.def b/clang/include/clang/Basic/LangStandards.def
index 244692a..4edc935 100644
--- a/clang/include/clang/Basic/LangStandards.def
+++ b/clang/include/clang/Basic/LangStandards.def
@@ -10,7 +10,7 @@
 #error "LANGSTANDARD must be defined before including this file"
 #endif
 
-/// LANGSTANDARD(IDENT, NAME, LANG, DESC, FEATURES)
+/// LANGSTANDARD(IDENT, NAME, LANG, DESC, FEATURES, VERSION)
 ///
 /// \param IDENT - The name of the standard as a C++ identifier.
 /// \param NAME - The name of the standard.
@@ -18,6 +18,8 @@
 /// \param DESC - A short description of the standard.
 /// \param FEATURES - The standard features as flags, these are enums from the
 /// clang::frontend namespace, which is assumed to be available.
+/// \param VERSION - The official version code for this standard.
+/// Has value 'std::nullopt' if no official version exists.
 
 /// LANGSTANDARD_ALIAS(IDENT, ALIAS)
 /// \param IDENT - The name of the standard as a C++ identifier.
@@ -36,186 +38,188 @@
 
 // C89-ish modes.
 LANGSTANDARD(c89, "c89",
-             C, "ISO C 1990", 0)
+             C, "ISO C 1990", 0, std::nullopt)
 LANGSTANDARD_ALIAS(c89, "c90")
 LANGSTANDARD_ALIAS(c89, "iso9899:1990")
 
 LANGSTANDARD(c94, "iso9899:199409",
              C, "ISO C 1990 with amendment 1",
-             Digraphs)
+             Digraphs, 199409)
 
 LANGSTANDARD(gnu89, "gnu89",
              C, "ISO C 1990 with GNU extensions",
-             LineComment | Digraphs | GNUMode)
+             LineComment | Digraphs | GNUMode, std::nullopt)
 LANGSTANDARD_ALIAS(gnu89, "gnu90")
 
 // C99-ish modes
 LANGSTANDARD(c99, "c99",
              C, "ISO C 1999",
-             LineComment | C99 | Digraphs | HexFloat)
+             LineComment | C99 | Digraphs | HexFloat, 199901)
 LANGSTANDARD_ALIAS(c99, "iso9899:1999")
 LANGSTANDARD_ALIAS_DEPR(c99, "c9x")
 LANGSTANDARD_ALIAS_DEPR(c99, "iso9899:199x")
 
 LANGSTANDARD(gnu99, "gnu99",
              C, "ISO C 1999 with GNU extensions",
-             LineComment | C99 | Digraphs | GNUMode | HexFloat)
+             LineComment | C99 | Digraphs | GNUMode | HexFloat, 199901)
 LANGSTANDARD_ALIAS_DEPR(gnu99, "gnu9x")
 
 // C11 modes
 LANGSTANDARD(c11, "c11",
              C, "ISO C 2011",
-             LineComment | C99 | C11 | Digraphs | HexFloat)
+             LineComment | C99 | C11 | Digraphs | HexFloat, 201112)
 LANGSTANDARD_ALIAS(c11, "iso9899:2011")
 LANGSTANDARD_ALIAS_DEPR(c11, "c1x")
 LANGSTANDARD_ALIAS_DEPR(c11, "iso9899:201x")
 
 LANGSTANDARD(gnu11, "gnu11",
              C, "ISO C 2011 with GNU extensions",
-             LineComment | C99 | C11 | Digraphs | GNUMode | HexFloat)
+             LineComment | C99 | C11 | Digraphs | GNUMode | HexFloat, 201112)
 LANGSTANDARD_ALIAS_DEPR(gnu11, "gnu1x")
 
 // C17 modes
 LANGSTANDARD(c17, "c17",
              C, "ISO C 2017",
-             LineComment | C99 | C11 | C17 | Digraphs | HexFloat)
+             LineComment | C99 | C11 | C17 | Digraphs | HexFloat, 201710)
 LANGSTANDARD_ALIAS(c17, "iso9899:2017")
 LANGSTANDARD_ALIAS(c17, "c18")
 LANGSTANDARD_ALIAS(c17, "iso9899:2018")
 LANGSTANDARD(gnu17, "gnu17",
              C, "ISO C 2017 with GNU extensions",
-             LineComment | C99 | C11 | C17 | Digraphs | GNUMode | HexFloat)
+             LineComment | C99 | C11 | C17 | Digraphs | GNUMode | HexFloat, 201710)
 LANGSTANDARD_ALIAS(gnu17, "gnu18")
 
 // C23 modes
 LANGSTANDARD(c23, "c23",
              C, "ISO C 2023",
-             LineComment | C99 | C11 | C17 | C23 | Digraphs | HexFloat)
+             LineComment | C99 | C11 | C17 | C23 | Digraphs | HexFloat, 202311)
 LANGSTANDARD_ALIAS(c23, "iso9899:2024")
 LANGSTANDARD_ALIAS_DEPR(c23, "c2x")
 LANGSTANDARD(gnu23, "gnu23",
              C, "ISO C 2023 with GNU extensions",
-             LineComment | C99 | C11 | C17 | C23 | Digraphs | GNUMode | HexFloat)
+             LineComment | C99 | C11 | C17 | C23 | Digraphs | GNUMode | HexFloat, 202311)
 LANGSTANDARD_ALIAS_DEPR(gnu23, "gnu2x")
 
 // C2y modes
+// FIXME: Use correct version code for C2y once published.
 LANGSTANDARD(c2y, "c2y",
              C, "Working Draft for ISO C2y",
-             LineComment | C99 | C11 | C17 | C23 | C2y | Digraphs | HexFloat)
+             LineComment | C99 | C11 | C17 | C23 | C2y | Digraphs | HexFloat, 202400)
 LANGSTANDARD(gnu2y, "gnu2y",
              C, "Working Draft for ISO C2y with GNU extensions",
-             LineComment | C99 | C11 | C17 | C23 | C2y | Digraphs | GNUMode | HexFloat)
+             LineComment | C99 | C11 | C17 | C23 | C2y | Digraphs | GNUMode | HexFloat, 202400)
 // TODO: Add the iso9899:202y alias once ISO publishes the standard.
 
 // C++ modes
 LANGSTANDARD(cxx98, "c++98",
              CXX, "ISO C++ 1998 with amendments",
-             LineComment | CPlusPlus | Digraphs)
+             LineComment | CPlusPlus | Digraphs, 199711)
 LANGSTANDARD_ALIAS(cxx98, "c++03")
 
 LANGSTANDARD(gnucxx98, "gnu++98",
              CXX, "ISO C++ 1998 with amendments and GNU extensions",
-             LineComment | CPlusPlus | Digraphs | GNUMode)
+             LineComment | CPlusPlus | Digraphs | GNUMode, 199711)
 LANGSTANDARD_ALIAS(gnucxx98, "gnu++03")
 
 LANGSTANDARD(cxx11, "c++11",
              CXX, "ISO C++ 2011 with amendments",
-             LineComment | CPlusPlus | CPlusPlus11 | Digraphs)
+             LineComment | CPlusPlus | CPlusPlus11 | Digraphs, 201103)
 LANGSTANDARD_ALIAS_DEPR(cxx11, "c++0x")
 
 LANGSTANDARD(gnucxx11, "gnu++11", CXX,
              "ISO C++ 2011 with amendments and GNU extensions",
-             LineComment | CPlusPlus | CPlusPlus11 | Digraphs | GNUMode)
+             LineComment | CPlusPlus | CPlusPlus11 | Digraphs | GNUMode, 201103)
 LANGSTANDARD_ALIAS_DEPR(gnucxx11, "gnu++0x")
 
 LANGSTANDARD(cxx14, "c++14",
              CXX, "ISO C++ 2014 with amendments",
-             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | Digraphs)
+             LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | Digraphs, 201402)
 LANGSTANDARD_ALIAS_DEPR(cxx14, "c++1y")
 
 LANGSTANDARD(gnucxx14, "gnu++14",
              CXX, "ISO C++ 2014 with amendments and GNU extensions",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | Digraphs |
-             GNUMode)
+             GNUMode, 201402)
 LANGSTANDARD_ALIAS_DEPR(gnucxx14, "gnu++1y")
 
 LANGSTANDARD(cxx17, "c++17",
              CXX, "ISO C++ 2017 with amendments",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             Digraphs | HexFloat)
+             Digraphs | HexFloat, 201703)
 LANGSTANDARD_ALIAS_DEPR(cxx17, "c++1z")
 
 LANGSTANDARD(gnucxx17, "gnu++17",
              CXX, "ISO C++ 2017 with amendments and GNU extensions",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             Digraphs | HexFloat | GNUMode)
+             Digraphs | HexFloat | GNUMode, 201703)
 LANGSTANDARD_ALIAS_DEPR(gnucxx17, "gnu++1z")
 
 LANGSTANDARD(cxx20, "c++20",
              CXX, "ISO C++ 2020 DIS",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             CPlusPlus20 | Digraphs | HexFloat)
+             CPlusPlus20 | Digraphs | HexFloat, 202002)
 LANGSTANDARD_ALIAS_DEPR(cxx20, "c++2a")
 
 LANGSTANDARD(gnucxx20, "gnu++20",
              CXX, "ISO C++ 2020 DIS with GNU extensions",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             CPlusPlus20 | Digraphs | HexFloat | GNUMode)
+             CPlusPlus20 | Digraphs | HexFloat | GNUMode, 202002)
 LANGSTANDARD_ALIAS_DEPR(gnucxx20, "gnu++2a")
 
 LANGSTANDARD(cxx23, "c++23",
              CXX, "ISO C++ 2023 DIS",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             CPlusPlus20 | CPlusPlus23 | Digraphs | HexFloat)
+             CPlusPlus20 | CPlusPlus23 | Digraphs | HexFloat, 202302)
 LANGSTANDARD_ALIAS_DEPR(cxx23, "c++2b")
 
 LANGSTANDARD(gnucxx23, "gnu++23",
              CXX, "ISO C++ 2023 DIS with GNU extensions",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             CPlusPlus20 | CPlusPlus23 | Digraphs | HexFloat | GNUMode)
+             CPlusPlus20 | CPlusPlus23 | Digraphs | HexFloat | GNUMode, 202302)
 LANGSTANDARD_ALIAS_DEPR(gnucxx23, "gnu++2b")
 
+// FIXME: Use correct version code for C++26 once published.
 LANGSTANDARD(cxx26, "c++2c",
              CXX, "Working draft for C++2c",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             CPlusPlus20 | CPlusPlus23 | CPlusPlus26 | Digraphs | HexFloat)
+             CPlusPlus20 | CPlusPlus23 | CPlusPlus26 | Digraphs | HexFloat, 202400)
 LANGSTANDARD_ALIAS(cxx26, "c++26")
 
 LANGSTANDARD(gnucxx26, "gnu++2c",
              CXX, "Working draft for C++2c with GNU extensions",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             CPlusPlus20 | CPlusPlus23 | CPlusPlus26 | Digraphs | HexFloat | GNUMode)
+             CPlusPlus20 | CPlusPlus23 | CPlusPlus26 | Digraphs | HexFloat | GNUMode, 202400)
 LANGSTANDARD_ALIAS(gnucxx26, "gnu++26")
 
 // OpenCL
 LANGSTANDARD(opencl10, "cl1.0",
              OpenCL, "OpenCL 1.0",
-             LineComment | C99 | Digraphs | HexFloat | OpenCL)
+             LineComment | C99 | Digraphs | HexFloat | OpenCL, std::nullopt)
 LANGSTANDARD_ALIAS_DEPR(opencl10, "cl")
 
 LANGSTANDARD(opencl11, "cl1.1",
              OpenCL, "OpenCL 1.1",
-             LineComment | C99 | Digraphs | HexFloat | OpenCL)
+             LineComment | C99 | Digraphs | HexFloat | OpenCL, std::nullopt)
 LANGSTANDARD(opencl12, "cl1.2",
              OpenCL, "OpenCL 1.2",
-             LineComment | C99 | Digraphs | HexFloat | OpenCL)
+             LineComment | C99 | Digraphs | HexFloat | OpenCL, std::nullopt)
 LANGSTANDARD(opencl20, "cl2.0",
              OpenCL, "OpenCL 2.0",
-             LineComment | C99 | Digraphs | HexFloat | OpenCL)
+             LineComment | C99 | Digraphs | HexFloat | OpenCL, std::nullopt)
 LANGSTANDARD(opencl30, "cl3.0",
              OpenCL, "OpenCL 3.0",
-             LineComment | C99 | Digraphs | HexFloat | OpenCL)
+             LineComment | C99 | Digraphs | HexFloat | OpenCL, std::nullopt)
 
 LANGSTANDARD(openclcpp10, "clc++1.0",
              OpenCL, "C++ for OpenCL 1.0",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             Digraphs | HexFloat | OpenCL)
+             Digraphs | HexFloat | OpenCL, std::nullopt)
 LANGSTANDARD_ALIAS(openclcpp10, "clc++")
 
 LANGSTANDARD(openclcpp2021, "clc++2021",
              OpenCL, "C++ for OpenCL 2021",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
-             Digraphs | HexFloat | OpenCL)
+             Digraphs | HexFloat | OpenCL, std::nullopt)
 
 LANGSTANDARD_ALIAS_DEPR(opencl10, "CL")
 LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1")
@@ -229,35 +233,35 @@ LANGSTANDARD_ALIAS_DEPR(openclcpp2021, "CLC++2021")
 // HLSL
 LANGSTANDARD(hlsl, "hlsl",
              HLSL, "High Level Shader Language",
-             LineComment | HLSL | CPlusPlus | CPlusPlus11)
+             LineComment | HLSL | CPlusPlus | CPlusPlus11, std::nullopt)
 
 LANGSTANDARD(hlsl2015, "hlsl2015",
              HLSL, "High Level Shader Language 2015",
-             LineComment | HLSL | CPlusPlus | CPlusPlus11)
+             LineComment | HLSL | CPlusPlus | CPlusPlus11, std::nullopt)
 
 LANGSTANDARD(hlsl2016, "hlsl2016",
              HLSL, "High Level Shader Language 2016",
-             LineComment | HLSL | CPlusPlus | CPlusPlus11)
+             LineComment | HLSL | CPlusPlus | CPlusPlus11, std::nullopt)
 
 LANGSTANDARD(hlsl2017, "hlsl2017",
              HLSL, "High Level Shader Language 2017",
-             LineComment | HLSL | CPlusPlus | CPlusPlus11)
+             LineComment | HLSL | CPlusPlus | CPlusPlus11, std::nullopt)
 
 LANGSTANDARD(hlsl2018, "hlsl2018",
              HLSL, "High Level Shader Language 2018",
-             LineComment | HLSL | CPlusPlus | CPlusPlus11)
+             LineComment | HLSL | CPlusPlus | CPlusPlus11, std::nullopt)
 
 LANGSTANDARD(hlsl2021, "hlsl2021",
              HLSL, "High Level Shader Language 2021",
-             LineComment | HLSL | CPlusPlus | CPlusPlus11)
+             LineComment | HLSL | CPlusPlus | CPlusPlus11, std::nullopt)
 
 LANGSTANDARD(hlsl202x, "hlsl202x",
              HLSL, "High Level Shader Language 202x",
-             LineComment | HLSL | CPlusPlus | CPlusPlus11)
+             LineComment | HLSL | CPlusPlus | CPlusPlus11, std::nullopt)
 
 LANGSTANDARD(hlsl202y, "hlsl202y",
              HLSL, "High Level Shader Language 202y",
-             LineComment | HLSL | CPlusPlus | CPlusPlus11)
+             LineComment | HLSL | CPlusPlus | CPlusPlus11, std::nullopt)
 
 
 #undef LANGSTANDARD
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 611b68e..7ae153d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6122,7 +6122,7 @@ def rewrite_legacy_objc : Flag<["-"], "rewrite-legacy-objc">,
 def rdynamic : Flag<["-"], "rdynamic">, Group<Link_Group>,
   Visibility<[ClangOption, FlangOption]>;
 def resource_dir : Separate<["-"], "resource-dir">,
-  Flags<[NoXarchOption, HelpHidden]>,
+  Flags<[NoXarchOption]>,
   Visibility<[ClangOption, CC1Option, CLOption, DXCOption, FlangOption, FC1Option]>,
   HelpText<"The directory which holds the compiler resource files">,
   MarshallingInfoString<HeaderSearchOpts<"ResourceDir">>;
@@ -6211,11 +6211,12 @@ def static : Flag<["-", "--"], "static">, Group<Link_Group>,
   Flags<[NoArgumentUnused]>;
 def std_default_EQ : Joined<["-"], "std-default=">;
 def std_EQ : Joined<["-", "--"], "std=">,
-  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
-  Group<CompileOnly_Group>, HelpText<"Language standard to compile for">,
-  ValuesCode<[{
+             Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
+             Group<CompileOnly_Group>,
+             HelpText<"Language standard to compile for">,
+             ValuesCode<[{
     static constexpr const char VALUES_CODE [] =
-    #define LANGSTANDARD(id, name, lang, desc, features) name ","
+    #define LANGSTANDARD(id, name, lang, desc, features, version) name ","
     #define LANGSTANDARD_ALIAS(id, alias) alias ","
     #include "clang/Basic/LangStandards.def"
     ;
@@ -9600,6 +9601,15 @@ def fhlsl_spv_use_unknown_image_format
                "from the resource's data type.">,
       MarshallingInfoFlag<LangOpts<"HLSLSpvUseUnknownImageFormat">>;
 
+def fhlsl_spv_enable_maximal_reconvergence
+    : Flag<["-"], "fspv-enable-maximal-reconvergence">,
+      Group<dxc_Group>,
+      Visibility<[CC1Option, DXCOption]>,
+      HelpText<"Enables the MaximallyReconvergesKHR execution mode for this "
+               "module. This ensures that control flow reconverges at "
+               "well-defined merge points as defined by the Vulkan spec.">,
+      MarshallingInfoFlag<LangOpts<"HLSLSpvEnableMaximalReconvergence">>;
+
 def no_wasm_opt : Flag<["--"], "no-wasm-opt">,
   Group<m_Group>,
   HelpText<"Disable the wasm-opt optimizer">,
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h
index e3cf1ba..1e87b47 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h
@@ -140,8 +140,8 @@ public:
       // It might be great to reuse FrontendOptions::getInputKindForExtension()
       // but for now it doesn't discriminate between code and header files.
       return llvm::StringSwitch<bool>(SM.getFilename(SL).rsplit('.').second)
-          .Cases("c", "m", "mm", "C", "cc", "cp", true)
-          .Cases("cpp", "CPP", "c++", "cxx", "cppm", true)
+          .Cases({"c", "m", "mm", "C", "cc", "cp"}, true)
+          .Cases({"cpp", "CPP", "c++", "cxx", "cppm"}, true)
           .Default(false);
     }
 
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 3603e9cd..e403b3e 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -563,8 +563,7 @@ comments::FullComment *ASTContext::getCommentForDecl(
       // does not have one of its own.
       QualType QT = TD->getUnderlyingType();
       if (const auto *TT = QT->getAs<TagType>())
-        if (comments::FullComment *FC =
-                getCommentForDecl(TT->getOriginalDecl(), PP))
+        if (comments::FullComment *FC = getCommentForDecl(TT->getDecl(), PP))
           return cloneFullComment(FC, D);
     }
     else if (const auto *IC = dyn_cast<ObjCInterfaceDecl>(D)) {
@@ -2387,7 +2386,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
   case Type::Record:
   case Type::Enum: {
     const auto *TT = cast<TagType>(T);
-    const TagDecl *TD = TT->getOriginalDecl()->getDefinitionOrSelf();
+    const TagDecl *TD = TT->getDecl()->getDefinitionOrSelf();
 
     if (TD->isInvalidDecl()) {
       Width = 8;
@@ -2531,7 +2530,7 @@ unsigned ASTContext::getTypeUnadjustedAlign(const Type *T) const {
 
   unsigned UnadjustedAlign;
   if (const auto *RT = T->getAsCanonical<RecordType>()) {
-    const ASTRecordLayout &Layout = getASTRecordLayout(RT->getOriginalDecl());
+    const ASTRecordLayout &Layout = getASTRecordLayout(RT->getDecl());
     UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
   } else if (const auto *ObjCI = T->getAsCanonical<ObjCInterfaceType>()) {
     const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
@@ -3469,7 +3468,7 @@ static void encodeTypeForFunctionPointerAuth(const ASTContext &Ctx,
     llvm_unreachable("should never get here");
   }
   case Type::Record: {
-    const RecordDecl *RD = T->castAsCanonical<RecordType>()->getOriginalDecl();
+    const RecordDecl *RD = T->castAsCanonical<RecordType>()->getDecl();
     const IdentifierInfo *II = RD->getIdentifier();
 
     // In C++, an immediate typedef of an anonymous struct or union
@@ -5377,7 +5376,7 @@ TagType *ASTContext::getTagTypeInternal(ElaboratedTypeKeyword Keyword,
   }();
   assert(T->getKeyword() == Keyword);
   assert(T->getQualifier() == Qualifier);
-  assert(T->getOriginalDecl() == TD);
+  assert(T->getDecl() == TD);
   assert(T->isInjected() == IsInjected);
   assert(T->isTagOwned() == OwnsTag);
   assert((T->isCanonicalUnqualified()
@@ -8271,7 +8270,7 @@ Qualifiers::ObjCLifetime ASTContext::getInnerObjCOwnership(QualType T) const {
 static const Type *getIntegerTypeForEnum(const EnumType *ET) {
   // Incomplete enum types are not treated as integer types.
   // FIXME: In C++, enum types are never integer types.
-  const EnumDecl *ED = ET->getOriginalDecl()->getDefinitionOrSelf();
+  const EnumDecl *ED = ET->getDecl()->getDefinitionOrSelf();
   if (ED->isComplete() && !ED->isScoped())
     return ED->getIntegerType().getTypePtr();
   return nullptr;
@@ -9189,7 +9188,7 @@ static void EncodeBitField(const ASTContext *Ctx, std::string& S,
     S += llvm::utostr(Offset);
 
     if (const auto *ET = T->getAsCanonical<EnumType>())
-      S += ObjCEncodingForEnumDecl(Ctx, ET->getOriginalDecl());
+      S += ObjCEncodingForEnumDecl(Ctx, ET->getDecl());
     else {
       const auto *BT = T->castAs<BuiltinType>();
       S += getObjCEncodingForPrimitiveType(Ctx, BT);
@@ -9246,7 +9245,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
     if (const auto *BT = dyn_cast<BuiltinType>(CT))
       S += getObjCEncodingForPrimitiveType(this, BT);
     else
-      S += ObjCEncodingForEnumDecl(this, cast<EnumType>(CT)->getOriginalDecl());
+      S += ObjCEncodingForEnumDecl(this, cast<EnumType>(CT)->getDecl());
     return;
 
   case Type::Complex:
@@ -9314,7 +9313,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
         return;
       }
     } else if (const auto *RTy = PointeeTy->getAsCanonical<RecordType>()) {
-      const IdentifierInfo *II = RTy->getOriginalDecl()->getIdentifier();
+      const IdentifierInfo *II = RTy->getDecl()->getIdentifier();
       // GCC binary compat: Need to convert "struct objc_class *" to "#".
       if (II == &Idents.get("objc_class")) {
         S += '#';
@@ -9386,7 +9385,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
     return;
 
   case Type::Record: {
-    RecordDecl *RDecl = cast<RecordType>(CT)->getOriginalDecl();
+    RecordDecl *RDecl = cast<RecordType>(CT)->getDecl();
     S += RDecl->isUnion() ? '(' : '{';
     // Anonymous structures print as '?'
     if (const IdentifierInfo *II = RDecl->getIdentifier()) {
@@ -11290,7 +11289,7 @@ QualType ASTContext::mergeTransparentUnionType(QualType T, QualType SubType,
                                                bool OfBlockPointer,
                                                bool Unqualified) {
   if (const RecordType *UT = T->getAsUnionType()) {
-    RecordDecl *UD = UT->getOriginalDecl()->getMostRecentDecl();
+    RecordDecl *UD = UT->getDecl()->getMostRecentDecl();
     if (UD->hasAttr<TransparentUnionAttr>()) {
       for (const auto *I : UD->fields()) {
         QualType ET = I->getType().getUnqualifiedType();
@@ -11560,7 +11559,7 @@ static QualType mergeEnumWithInteger(ASTContext &Context, const EnumType *ET,
   // Compatibility is based on the underlying type, not the promotion
   // type.
   QualType underlyingType =
-      ET->getOriginalDecl()->getDefinitionOrSelf()->getIntegerType();
+      ET->getDecl()->getDefinitionOrSelf()->getIntegerType();
   if (underlyingType.isNull())
     return {};
   if (Context.hasSameType(underlyingType, other))
@@ -14160,11 +14159,10 @@ static QualType getCommonNonSugarTypeNode(const ASTContext &Ctx, const Type *X,
   case Type::Record:
   case Type::InjectedClassName: {
     const auto *TX = cast<TagType>(X), *TY = cast<TagType>(Y);
-    return Ctx.getTagType(
-        ::getCommonTypeKeyword(TX, TY, /*IsSame=*/false),
-        ::getCommonQualifier(Ctx, TX, TY, /*IsSame=*/false),
-        ::getCommonDeclChecked(TX->getOriginalDecl(), TY->getOriginalDecl()),
-        /*OwnedTag=*/false);
+    return Ctx.getTagType(::getCommonTypeKeyword(TX, TY, /*IsSame=*/false),
+                          ::getCommonQualifier(Ctx, TX, TY, /*IsSame=*/false),
+                          ::getCommonDeclChecked(TX->getDecl(), TY->getDecl()),
+                          /*OwnedTag=*/false);
   }
   case Type::TemplateSpecialization: {
     const auto *TX = cast<TemplateSpecializationType>(X),
diff --git a/clang/lib/AST/ASTDiagnostic.cpp b/clang/lib/AST/ASTDiagnostic.cpp
index d7fd411..b8023cb 100644
--- a/clang/lib/AST/ASTDiagnostic.cpp
+++ b/clang/lib/AST/ASTDiagnostic.cpp
@@ -196,8 +196,7 @@ break; \
     // Don't desugar through the primary typedef of an anonymous type.
     if (const TagType *UTT = Underlying->getAs<TagType>())
       if (const TypedefType *QTT = dyn_cast<TypedefType>(QT))
-        if (UTT->getOriginalDecl()->getTypedefNameForAnonDecl() ==
-            QTT->getDecl())
+        if (UTT->getDecl()->getTypedefNameForAnonDecl() == QTT->getDecl())
           break;
 
     // Record that we actually looked through an opaque type here.
@@ -1147,14 +1146,11 @@ class TemplateDiff {
     if (const auto* SubstType = Ty->getAs<SubstTemplateTypeParmType>())
       Ty = SubstType->getReplacementType();
 
-    const RecordType *RT = Ty->getAs<RecordType>();
-
+    const auto *RT = Ty->getAs<RecordType>();
     if (!RT)
       return nullptr;
 
-    const ClassTemplateSpecializationDecl *CTSD =
-        dyn_cast<ClassTemplateSpecializationDecl>(RT->getOriginalDecl());
-
+    const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl());
     if (!CTSD)
       return nullptr;
 
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index f43fa8c..bf51c3e 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -1740,7 +1740,7 @@ ExpectedType ASTNodeImporter::VisitDeducedTemplateSpecializationType(
 }
 
 ExpectedType ASTNodeImporter::VisitTagType(const TagType *T) {
-  TagDecl *DeclForType = T->getOriginalDecl();
+  TagDecl *DeclForType = T->getDecl();
   Expected<TagDecl *> ToDeclOrErr = import(DeclForType);
   if (!ToDeclOrErr)
     return ToDeclOrErr.takeError();
@@ -2155,7 +2155,7 @@ Error ASTNodeImporter::ImportDeclParts(
       const Type *LeafT =
           getLeafPointeeType(P->getType().getCanonicalType().getTypePtr());
       auto *RT = dyn_cast<RecordType>(LeafT);
-      if (RT && RT->getOriginalDecl() == D) {
+      if (RT && RT->getDecl() == D) {
         Importer.FromDiag(D->getLocation(), diag::err_unsupported_ast_node)
             << D->getDeclKindName();
         return make_error<ASTImportError>(ASTImportError::UnsupportedConstruct);
@@ -2408,8 +2408,8 @@ Error ASTNodeImporter::ImportFieldDeclDefinition(const FieldDecl *From,
     const RecordType *RecordTo = ToType->getAs<RecordType>();
 
     if (RecordFrom && RecordTo) {
-      FromRecordDecl = RecordFrom->getOriginalDecl();
-      ToRecordDecl = RecordTo->getOriginalDecl();
+      FromRecordDecl = RecordFrom->getDecl();
+      ToRecordDecl = RecordTo->getDecl();
     }
   }
 
@@ -3205,7 +3205,7 @@ ExpectedDecl ASTNodeImporter::VisitEnumDecl(EnumDecl *D) {
 
       if (auto *Typedef = dyn_cast<TypedefNameDecl>(FoundDecl)) {
         if (const auto *Tag = Typedef->getUnderlyingType()->getAs<TagType>())
-          FoundDecl = Tag->getOriginalDecl();
+          FoundDecl = Tag->getDecl();
       }
 
       if (auto *FoundEnum = dyn_cast<EnumDecl>(FoundDecl)) {
@@ -3336,7 +3336,7 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
       Decl *Found = FoundDecl;
       if (auto *Typedef = dyn_cast<TypedefNameDecl>(Found)) {
         if (const auto *Tag = Typedef->getUnderlyingType()->getAs<TagType>())
-          Found = Tag->getOriginalDecl();
+          Found = Tag->getDecl();
       }
 
       if (auto *FoundRecord = dyn_cast<RecordDecl>(Found)) {
@@ -3757,12 +3757,11 @@ public:
   }
 
   std::optional<bool> VisitTagType(const TagType *T) {
-    if (auto *Spec =
-            dyn_cast<ClassTemplateSpecializationDecl>(T->getOriginalDecl()))
+    if (auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(T->getDecl()))
       for (const auto &Arg : Spec->getTemplateArgs().asArray())
         if (checkTemplateArgument(Arg))
           return true;
-    return isAncestorDeclContextOf(ParentDC, T->getOriginalDecl());
+    return isAncestorDeclContextOf(ParentDC, T->getDecl());
   }
 
   std::optional<bool> VisitPointerType(const PointerType *T) {
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index b17cd6f..da64c92 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -878,10 +878,10 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
       // Treat the enumeration as its underlying type and use the builtin type
       // class comparison.
       if (T1->getTypeClass() == Type::Enum) {
-        T1 = cast<EnumType>(T1)->getOriginalDecl()->getIntegerType();
+        T1 = cast<EnumType>(T1)->getDecl()->getIntegerType();
         assert(T2->isBuiltinType() && !T1.isNull()); // Sanity check
       } else if (T2->getTypeClass() == Type::Enum) {
-        T2 = cast<EnumType>(T2)->getOriginalDecl()->getIntegerType();
+        T2 = cast<EnumType>(T2)->getDecl()->getIntegerType();
         assert(T1->isBuiltinType() && !T2.isNull()); // Sanity check
       }
       TC = Type::Builtin;
@@ -1300,8 +1300,7 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
     if (!IsStructurallyEquivalent(Context, TT1->getQualifier(),
                                   TT2->getQualifier()))
       return false;
-    if (!IsStructurallyEquivalent(Context, TT1->getOriginalDecl(),
-                                  TT2->getOriginalDecl()))
+    if (!IsStructurallyEquivalent(Context, TT1->getDecl(), TT2->getDecl()))
       return false;
     break;
   }
@@ -1531,8 +1530,8 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
   // types
   if (Field1->isAnonymousStructOrUnion() &&
       Field2->isAnonymousStructOrUnion()) {
-    RecordDecl *D1 = Field1->getType()->castAs<RecordType>()->getOriginalDecl();
-    RecordDecl *D2 = Field2->getType()->castAs<RecordType>()->getOriginalDecl();
+    RecordDecl *D1 = Field1->getType()->castAs<RecordType>()->getDecl();
+    RecordDecl *D2 = Field2->getType()->castAs<RecordType>()->getDecl();
     return IsStructurallyEquivalent(Context, D1, D2);
   }
 
@@ -2587,7 +2586,7 @@ StructuralEquivalenceContext::findUntaggedStructOrUnionIndex(RecordDecl *Anon) {
     // struct { ... } A;
     QualType FieldType = F->getType();
     if (const auto *RecType = dyn_cast<RecordType>(FieldType)) {
-      const RecordDecl *RecDecl = RecType->getOriginalDecl();
+      const RecordDecl *RecDecl = RecType->getDecl();
       if (RecDecl->getDeclContext() == Owner && !RecDecl->getIdentifier()) {
         if (Context.hasSameType(FieldType, AnonTy))
           break;
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index c71fd22..74cae03 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -4660,7 +4660,7 @@ const RecordType *Compiler<Emitter>::getRecordTy(QualType Ty) {
 
 template <class Emitter> Record *Compiler<Emitter>::getRecord(QualType Ty) {
   if (const auto *RecordTy = getRecordTy(Ty))
-    return getRecord(RecordTy->getOriginalDecl()->getDefinitionOrSelf());
+    return getRecord(RecordTy->getDecl()->getDefinitionOrSelf());
   return nullptr;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 2d3cb6a..b69f360 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2790,6 +2790,34 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC,
+                                        const CallExpr *Call) {
+  assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
+  const Pointer &Control = S.Stk.pop<Pointer>();
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  unsigned NumElems = Dst.getNumElems();
+  assert(NumElems == Control.getNumElems());
+  assert(NumElems == Dst.getNumElems());
+
+  for (unsigned Idx = 0; Idx != NumElems; ++Idx) {
+    uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx));
+
+    if (Ctlb & 0x80) {
+      Dst.elem<int8_t>(Idx) = 0;
+    } else {
+      unsigned LaneBase = (Idx / 16) * 16;
+      unsigned SrcOffset = Ctlb & 0x0F;
+      unsigned SrcIdx = LaneBase + SrcOffset;
+
+      Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx);
+    }
+  }
+  Dst.initializeAllElements();
+  return true;
+}
+
 static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
                                        const CallExpr *Call, bool IsShufHW) {
   assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
@@ -3943,6 +3971,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
   case X86::BI__builtin_ia32_selectpd_512:
     return interp__builtin_select(S, OpPC, Call);
 
+  case X86::BI__builtin_ia32_pshufb128:
+  case X86::BI__builtin_ia32_pshufb256:
+  case X86::BI__builtin_ia32_pshufb512:
+    return interp__builtin_ia32_pshufb(S, OpPC, Call);
+
   case X86::BI__builtin_ia32_pshuflw:
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512:
diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h
index a13244b..e2e4d5c 100644
--- a/clang/lib/AST/ByteCode/InterpState.h
+++ b/clang/lib/AST/ByteCode/InterpState.h
@@ -114,7 +114,7 @@ public:
       Alloc = std::make_unique<DynamicAllocator>();
     }
 
-    return *Alloc.get();
+    return *Alloc;
   }
 
   /// Diagnose any dynamic allocations that haven't been freed yet.
diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp
index 663134c..e417bdf 100644
--- a/clang/lib/AST/ByteCode/Pointer.cpp
+++ b/clang/lib/AST/ByteCode/Pointer.cpp
@@ -751,7 +751,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
       assert(Record && "Missing record descriptor");
 
       bool Ok = true;
-      if (RT->getOriginalDecl()->isUnion()) {
+      if (RT->getDecl()->isUnion()) {
         const FieldDecl *ActiveField = nullptr;
         APValue Value;
         for (const auto &F : Record->fields()) {
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 69cbf6e..f048076 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -2989,10 +2989,7 @@ bool ParmVarDecl::isDestroyedInCallee() const {
   // FIXME: isParamDestroyedInCallee() should probably imply
   // isDestructedType()
   const auto *RT = getType()->getAsCanonical<RecordType>();
-  if (RT &&
-      RT->getOriginalDecl()
-          ->getDefinitionOrSelf()
-          ->isParamDestroyedInCallee() &&
+  if (RT && RT->getDecl()->getDefinitionOrSelf()->isParamDestroyedInCallee() &&
       getType().isDestructedType())
     return true;
 
@@ -3507,7 +3504,7 @@ bool FunctionDecl::isUsableAsGlobalAllocationFunctionInConstantEvaluation(
     while (const auto *TD = T->getAs<TypedefType>())
       T = TD->getDecl()->getUnderlyingType();
     const IdentifierInfo *II =
-        T->castAsCanonical<EnumType>()->getOriginalDecl()->getIdentifier();
+        T->castAsCanonical<EnumType>()->getDecl()->getIdentifier();
     if (II && II->isStr("__hot_cold_t"))
       Consume();
   }
@@ -4709,7 +4706,7 @@ bool FieldDecl::isAnonymousStructOrUnion() const {
     return false;
 
   if (const auto *Record = getType()->getAsCanonical<RecordType>())
-    return Record->getOriginalDecl()->isAnonymousStructOrUnion();
+    return Record->getDecl()->isAnonymousStructOrUnion();
 
   return false;
 }
@@ -4769,7 +4766,7 @@ bool FieldDecl::isZeroSize(const ASTContext &Ctx) const {
   const auto *RT = getType()->getAsCanonical<RecordType>();
   if (!RT)
     return false;
-  const RecordDecl *RD = RT->getOriginalDecl()->getDefinition();
+  const RecordDecl *RD = RT->getDecl()->getDefinition();
   if (!RD) {
     assert(isInvalidDecl() && "valid field has incomplete type");
     return false;
@@ -5194,7 +5191,7 @@ bool RecordDecl::isOrContainsUnion() const {
   if (const RecordDecl *Def = getDefinition()) {
     for (const FieldDecl *FD : Def->fields()) {
       const RecordType *RT = FD->getType()->getAsCanonical<RecordType>();
-      if (RT && RT->getOriginalDecl()->isOrContainsUnion())
+      if (RT && RT->getDecl()->isOrContainsUnion())
         return true;
     }
   }
@@ -5692,14 +5689,14 @@ void TypedefNameDecl::anchor() {}
 
 TagDecl *TypedefNameDecl::getAnonDeclWithTypedefName(bool AnyRedecl) const {
   if (auto *TT = getTypeSourceInfo()->getType()->getAs<TagType>()) {
-    auto *OwningTypedef = TT->getOriginalDecl()->getTypedefNameForAnonDecl();
+    auto *OwningTypedef = TT->getDecl()->getTypedefNameForAnonDecl();
     auto *ThisTypedef = this;
     if (AnyRedecl && OwningTypedef) {
       OwningTypedef = OwningTypedef->getCanonicalDecl();
       ThisTypedef = ThisTypedef->getCanonicalDecl();
     }
     if (OwningTypedef == ThisTypedef)
-      return TT->getOriginalDecl()->getDefinitionOrSelf();
+      return TT->getDecl()->getDefinitionOrSelf();
   }
 
   return nullptr;
@@ -5708,7 +5705,7 @@ TagDecl *TypedefNameDecl::getAnonDeclWithTypedefName(bool AnyRedecl) const {
 bool TypedefNameDecl::isTransparentTagSlow() const {
   auto determineIsTransparent = [&]() {
     if (auto *TT = getUnderlyingType()->getAs<TagType>()) {
-      if (auto *TD = TT->getOriginalDecl()) {
+      if (auto *TD = TT->getDecl()) {
         if (TD->getName() != getName())
           return false;
         SourceLocation TTLoc = getLocation();
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index b244f0a..30c6d3e 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -77,8 +77,11 @@ void *Decl::operator new(std::size_t Size, const ASTContext &Context,
   *PrefixPtr = ID.getRawValue();
 
   // We leave the upper 16 bits to store the module IDs. 48 bits should be
-  // sufficient to store a declaration ID.
-  assert(*PrefixPtr < llvm::maskTrailingOnes<uint64_t>(48));
+  // sufficient to store a declaration ID. See the comments in setOwningModuleID
+  // for details.
+  assert((*PrefixPtr < llvm::maskTrailingOnes<uint64_t>(48)) &&
+         "Current Implementation limits the number of module files to not "
+         "exceed 2^16. Contact Clang Developers to remove the limitation.");
 
   return Result;
 }
@@ -122,6 +125,25 @@ unsigned Decl::getOwningModuleID() const {
 
 void Decl::setOwningModuleID(unsigned ID) {
   assert(isFromASTFile() && "Only works on a deserialized declaration");
+  // Currently, we use 64 bits to store the GlobalDeclID and the module ID
+  // to save the space. See `Decl::operator new` for details. To make it,
+  // we split the higher 32 bits to 2 16bits for the module file index of
+  // GlobalDeclID and the module ID. This introduces a limitation that the
+  // number of modules can't exceed 2^16. (The number of module files should be
+  // less than the number of modules).
+  //
+  // It is counter-intuitive to store both the module file index and the
+  // module ID as it seems redundant. However, this is not true.
+  // The module ID may be different from the module file where it is serialized
+  // from for implicit template instantiations. See
+  // https://github.com/llvm/llvm-project/issues/101939
+  //
+  // If we reach the limitation, we have to remove the limitation by asking
+  // every deserialized declaration to pay for yet another 32 bits, or we have
+  // to review the above issue to decide what we should do for it.
+  assert((ID < llvm::maskTrailingOnes<unsigned>(16)) &&
+         "Current Implementation limits the number of modules to not exceed "
+         "2^16. Contact Clang Developers to remove the limitation.");
   uint64_t *IDAddress = (uint64_t *)this - 1;
   *IDAddress &= llvm::maskTrailingOnes<uint64_t>(48);
   *IDAddress |= (uint64_t)ID << 48;
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 43264f8..24e4f18 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -2314,7 +2314,7 @@ bool CXXRecordDecl::mayBeAbstract() const {
 
   for (const auto &B : bases()) {
     const auto *BaseDecl = cast<CXXRecordDecl>(
-        B.getType()->castAsCanonical<RecordType>()->getOriginalDecl());
+        B.getType()->castAsCanonical<RecordType>()->getDecl());
     if (BaseDecl->isAbstract())
       return true;
   }
diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp
index 7f3dcca..47ae613 100644
--- a/clang/lib/AST/DeclPrinter.cpp
+++ b/clang/lib/AST/DeclPrinter.cpp
@@ -485,7 +485,7 @@ void DeclPrinter::VisitDeclContext(DeclContext *DC, bool Indent) {
       QualType BaseType = GetBaseType(CurDeclType);
       if (const auto *TT = dyn_cast_or_null<TagType>(BaseType);
           TT && TT->isTagOwned()) {
-        if (TT->getOriginalDecl() == Decls[0]) {
+        if (TT->getDecl() == Decls[0]) {
           Decls.push_back(*D);
           continue;
         }
diff --git a/clang/lib/AST/DeclarationName.cpp b/clang/lib/AST/DeclarationName.cpp
index 55f5a99..9a89a66 100644
--- a/clang/lib/AST/DeclarationName.cpp
+++ b/clang/lib/AST/DeclarationName.cpp
@@ -116,12 +116,12 @@ static void printCXXConstructorDestructorName(QualType ClassType,
   Policy.SuppressScope = true;
 
   if (const RecordType *ClassRec = ClassType->getAsCanonical<RecordType>()) {
-    ClassRec->getOriginalDecl()->printName(OS, Policy);
+    ClassRec->getDecl()->printName(OS, Policy);
     return;
   }
   if (Policy.SuppressTemplateArgsInCXXConstructors) {
     if (auto *InjTy = ClassType->getAsCanonical<InjectedClassNameType>()) {
-      InjTy->getOriginalDecl()->printName(OS, Policy);
+      InjTy->getDecl()->printName(OS, Policy);
       return;
     }
   }
@@ -185,7 +185,7 @@ void DeclarationName::print(raw_ostream &OS,
     OS << "operator ";
     QualType Type = getCXXNameType();
     if (const RecordType *Rec = Type->getAs<RecordType>()) {
-      OS << *Rec->getOriginalDecl();
+      OS << *Rec->getDecl();
       return;
     }
     // We know we're printing C++ here, ensure we print 'bool' properly.
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 597cbd8..340bb4b 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4126,9 +4126,7 @@ Expr::isNullPointerConstant(ASTContext &Ctx,
 
   if (const RecordType *UT = getType()->getAsUnionType())
     if (!Ctx.getLangOpts().CPlusPlus11 && UT &&
-        UT->getOriginalDecl()
-            ->getMostRecentDecl()
-            ->hasAttr<TransparentUnionAttr>())
+        UT->getDecl()->getMostRecentDecl()->hasAttr<TransparentUnionAttr>())
       if (const CompoundLiteralExpr *CLE = dyn_cast<CompoundLiteralExpr>(this)){
         const Expr *InitExpr = CLE->getInitializer();
         if (const InitListExpr *ILE = dyn_cast<InitListExpr>(InitExpr))
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 51c0382..a07eb22 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -4074,8 +4074,7 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
       }
 
       // Next subobject is a class, struct or union field.
-      RecordDecl *RD =
-          ObjType->castAsCanonical<RecordType>()->getOriginalDecl();
+      RecordDecl *RD = ObjType->castAsCanonical<RecordType>()->getDecl();
       if (RD->isUnion()) {
         const FieldDecl *UnionField = O->getUnionField();
         if (!UnionField ||
@@ -7810,7 +7809,7 @@ class BufferToAPValueConverter {
 
   std::optional<APValue> visit(const EnumType *Ty, CharUnits Offset) {
     QualType RepresentationType =
-        Ty->getOriginalDecl()->getDefinitionOrSelf()->getIntegerType();
+        Ty->getDecl()->getDefinitionOrSelf()->getIntegerType();
     assert(!RepresentationType.isNull() &&
            "enum forward decl should be caught by Sema");
     const auto *AsBuiltin =
@@ -8607,7 +8606,7 @@ public:
     const FieldDecl *FD = dyn_cast<FieldDecl>(E->getMemberDecl());
     if (!FD) return Error(E);
     assert(!FD->getType()->isReferenceType() && "prvalue reference?");
-    assert(BaseTy->castAsCanonical<RecordType>()->getOriginalDecl() ==
+    assert(BaseTy->castAsCanonical<RecordType>()->getDecl() ==
                FD->getParent()->getCanonicalDecl() &&
            "record / field mismatch");
 
@@ -8836,7 +8835,7 @@ public:
 
     const ValueDecl *MD = E->getMemberDecl();
     if (const FieldDecl *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) {
-      assert(BaseTy->castAsCanonical<RecordType>()->getOriginalDecl() ==
+      assert(BaseTy->castAsCanonical<RecordType>()->getDecl() ==
                  FD->getParent()->getCanonicalDecl() &&
              "record / field mismatch");
       (void)BaseTy;
@@ -11619,6 +11618,44 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
   return true;
 }
 
+static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call,
+                              APValue &Out) {
+  APValue SrcVec, ControlVec;
+  if (!EvaluateAsRValue(Info, Call->getArg(0), SrcVec))
+    return false;
+  if (!EvaluateAsRValue(Info, Call->getArg(1), ControlVec))
+    return false;
+
+  const auto *VT = Call->getType()->getAs<VectorType>();
+  if (!VT)
+    return false;
+
+  QualType ElemT = VT->getElementType();
+  unsigned NumElts = VT->getNumElements();
+
+  SmallVector<APValue, 64> ResultElements;
+  ResultElements.reserve(NumElts);
+
+  for (unsigned Idx = 0; Idx != NumElts; ++Idx) {
+    APValue CtlVal = ControlVec.getVectorElt(Idx);
+    APSInt CtlByte = CtlVal.getInt();
+    uint8_t Ctl = static_cast<uint8_t>(CtlByte.getZExtValue());
+
+    if (Ctl & 0x80) {
+      APValue Zero(Info.Ctx.MakeIntValue(0, ElemT));
+      ResultElements.push_back(Zero);
+    } else {
+      unsigned LaneBase = (Idx / 16) * 16;
+      unsigned SrcOffset = Ctl & 0x0F;
+      unsigned SrcIdx = LaneBase + SrcOffset;
+
+      ResultElements.push_back(SrcVec.getVectorElt(SrcIdx));
+    }
+  }
+  Out = APValue(ResultElements.data(), ResultElements.size());
+  return true;
+}
+
 static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
                              bool IsShufHW, APValue &Out) {
   APValue Vec;
@@ -12241,6 +12278,15 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
 
+  case X86::BI__builtin_ia32_pshufb128:
+  case X86::BI__builtin_ia32_pshufb256:
+  case X86::BI__builtin_ia32_pshufb512: {
+    APValue R;
+    if (!evalPshufbBuiltin(Info, E, R))
+      return false;
+    return Success(R, E);
+  }
+
   case X86::BI__builtin_ia32_pshuflw:
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512: {
diff --git a/clang/lib/AST/InheritViz.cpp b/clang/lib/AST/InheritViz.cpp
index 3c4a5a8..c5f4c2b 100644
--- a/clang/lib/AST/InheritViz.cpp
+++ b/clang/lib/AST/InheritViz.cpp
@@ -89,8 +89,8 @@ void InheritanceHierarchyWriter::WriteNode(QualType Type, bool FromVirtual) {
   Out << " \"];\n";
 
   // Display the base classes.
-  const auto *Decl = cast<CXXRecordDecl>(
-      Type->castAsCanonical<RecordType>()->getOriginalDecl());
+  const auto *Decl =
+      cast<CXXRecordDecl>(Type->castAsCanonical<RecordType>()->getDecl());
   for (const auto &Base : Decl->bases()) {
     QualType CanonBaseType = Context.getCanonicalType(Base.getType());
 
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 844db79..5572e0a 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -2491,7 +2491,7 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
   case Type::Enum:
   case Type::Record:
     mangleSourceNameWithAbiTags(
-        cast<TagType>(Ty)->getOriginalDecl()->getDefinitionOrSelf());
+        cast<TagType>(Ty)->getDecl()->getDefinitionOrSelf());
     break;
 
   case Type::TemplateSpecialization: {
@@ -2556,9 +2556,8 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
   }
 
   case Type::InjectedClassName:
-    mangleSourceNameWithAbiTags(cast<InjectedClassNameType>(Ty)
-                                    ->getOriginalDecl()
-                                    ->getDefinitionOrSelf());
+    mangleSourceNameWithAbiTags(
+        cast<InjectedClassNameType>(Ty)->getDecl()->getDefinitionOrSelf());
     break;
 
   case Type::DependentName:
@@ -3795,7 +3794,7 @@ void CXXNameMangler::mangleType(const RecordType *T) {
   mangleType(static_cast<const TagType*>(T));
 }
 void CXXNameMangler::mangleType(const TagType *T) {
-  mangleName(T->getOriginalDecl()->getDefinitionOrSelf());
+  mangleName(T->getDecl()->getDefinitionOrSelf());
 }
 
 // <type>       ::= <array-type>
@@ -4430,8 +4429,8 @@ void CXXNameMangler::mangleType(const InjectedClassNameType *T) {
   // Mangle injected class name types as if the user had written the
   // specialization out fully.  It may not actually be possible to see
   // this mangling, though.
-  mangleType(T->getOriginalDecl()->getCanonicalTemplateSpecializationType(
-      getASTContext()));
+  mangleType(
+      T->getDecl()->getCanonicalTemplateSpecializationType(getASTContext()));
 }
 
 void CXXNameMangler::mangleType(const TemplateSpecializationType *T) {
@@ -4692,7 +4691,7 @@ void CXXNameMangler::mangleIntegerLiteral(QualType T,
 void CXXNameMangler::mangleMemberExprBase(const Expr *Base, bool IsArrow) {
   // Ignore member expressions involving anonymous unions.
   while (const auto *RT = Base->getType()->getAsCanonical<RecordType>()) {
-    if (!RT->getOriginalDecl()->isAnonymousStructOrUnion())
+    if (!RT->getDecl()->isAnonymousStructOrUnion())
       break;
     const auto *ME = dyn_cast<MemberExpr>(Base);
     if (!ME)
@@ -7010,8 +7009,7 @@ bool CXXNameMangler::isSpecializedAs(QualType S, llvm::StringRef Name,
   if (!RT)
     return false;
 
-  const ClassTemplateSpecializationDecl *SD =
-      dyn_cast<ClassTemplateSpecializationDecl>(RT->getOriginalDecl());
+  const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl());
   if (!SD || !SD->getIdentifier()->isStr(Name))
     return false;
 
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index 0ef6328..9f4dba9 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -396,7 +396,7 @@ llvm::json::Array JSONNodeDumper::createCastPath(const CastExpr *C) {
   for (auto I = C->path_begin(), E = C->path_end(); I != E; ++I) {
     const CXXBaseSpecifier *Base = *I;
     const auto *RD = cast<CXXRecordDecl>(
-        Base->getType()->castAsCanonical<RecordType>()->getOriginalDecl());
+        Base->getType()->castAsCanonical<RecordType>()->getDecl());
 
     llvm::json::Object Val{{"name", RD->getName()}};
     if (Base->isVirtual())
@@ -764,7 +764,7 @@ void JSONNodeDumper::VisitTagType(const TagType *TT) {
     Qualifier.print(OS, PrintPolicy, /*ResolveTemplateArguments=*/true);
     JOS.attribute("qualifier", Str);
   }
-  JOS.attribute("decl", createBareDeclRef(TT->getOriginalDecl()));
+  JOS.attribute("decl", createBareDeclRef(TT->getDecl()));
   if (TT->isTagOwned())
     JOS.attribute("isTagOwned", true);
 }
@@ -816,7 +816,7 @@ void JSONNodeDumper::VisitTemplateSpecializationType(
 
 void JSONNodeDumper::VisitInjectedClassNameType(
     const InjectedClassNameType *ICNT) {
-  JOS.attribute("decl", createBareDeclRef(ICNT->getOriginalDecl()));
+  JOS.attribute("decl", createBareDeclRef(ICNT->getDecl()));
 }
 
 void JSONNodeDumper::VisitObjCInterfaceType(const ObjCInterfaceType *OIT) {
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 8cbc72b..f1baf9f 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -3248,11 +3248,11 @@ void MicrosoftCXXNameMangler::mangleTagTypeKind(TagTypeKind TTK) {
 }
 void MicrosoftCXXNameMangler::mangleType(const EnumType *T, Qualifiers,
                                          SourceRange) {
-  mangleType(cast<TagType>(T)->getOriginalDecl());
+  mangleType(cast<TagType>(T)->getDecl());
 }
 void MicrosoftCXXNameMangler::mangleType(const RecordType *T, Qualifiers,
                                          SourceRange) {
-  mangleType(cast<TagType>(T)->getOriginalDecl());
+  mangleType(cast<TagType>(T)->getDecl());
 }
 void MicrosoftCXXNameMangler::mangleType(const TagDecl *TD) {
   // MSVC chooses the tag kind of the definition if it exists, otherwise it
diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp
index 6842038..46a4e25 100644
--- a/clang/lib/AST/ODRHash.cpp
+++ b/clang/lib/AST/ODRHash.cpp
@@ -913,7 +913,7 @@ public:
       return false;
 
     if (TypedefT->getDecl()->getIdentifier() !=
-        TagT->getOriginalDecl()->getIdentifier())
+        TagT->getDecl()->getIdentifier())
       return false;
 
     ID.AddInteger(TagT->getTypeClass());
@@ -1059,7 +1059,7 @@ public:
   }
 
   void VisitInjectedClassNameType(const InjectedClassNameType *T) {
-    AddDecl(T->getOriginalDecl()->getDefinitionOrSelf());
+    AddDecl(T->getDecl()->getDefinitionOrSelf());
     VisitType(T);
   }
 
@@ -1164,7 +1164,7 @@ public:
     AddNestedNameSpecifier(ElaboratedOverride
                                ? ElaboratedOverride->getQualifier()
                                : T->getQualifier());
-    AddDecl(T->getOriginalDecl()->getDefinitionOrSelf());
+    AddDecl(T->getDecl()->getDefinitionOrSelf());
     VisitType(T);
   }
 
diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index acc011c..7138dff 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -20,36 +20,6 @@
 
 using namespace clang;
 
-ParentMapContext::ParentMapContext(ASTContext &Ctx) : ASTCtx(Ctx) {}
-
-ParentMapContext::~ParentMapContext() = default;
-
-void ParentMapContext::clear() { Parents.reset(); }
-
-const Expr *ParentMapContext::traverseIgnored(const Expr *E) const {
-  return traverseIgnored(const_cast<Expr *>(E));
-}
-
-Expr *ParentMapContext::traverseIgnored(Expr *E) const {
-  if (!E)
-    return nullptr;
-
-  switch (Traversal) {
-  case TK_AsIs:
-    return E;
-  case TK_IgnoreUnlessSpelledInSource:
-    return E->IgnoreUnlessSpelledInSource();
-  }
-  llvm_unreachable("Invalid Traversal type!");
-}
-
-DynTypedNode ParentMapContext::traverseIgnored(const DynTypedNode &N) const {
-  if (const auto *E = N.get<Expr>()) {
-    return DynTypedNode::create(*traverseIgnored(E));
-  }
-  return N;
-}
-
 template <typename T, typename... U>
 static std::tuple<bool, DynTypedNodeList, const T *, const U *...>
 matchParents(const DynTypedNodeList &NodeList,
@@ -334,6 +304,36 @@ matchParents(const DynTypedNodeList &NodeList,
   return MatchParents<T, U...>::match(NodeList, ParentMap);
 }
 
+ParentMapContext::ParentMapContext(ASTContext &Ctx) : ASTCtx(Ctx) {}
+
+ParentMapContext::~ParentMapContext() = default;
+
+void ParentMapContext::clear() { Parents.reset(); }
+
+const Expr *ParentMapContext::traverseIgnored(const Expr *E) const {
+  return traverseIgnored(const_cast<Expr *>(E));
+}
+
+Expr *ParentMapContext::traverseIgnored(Expr *E) const {
+  if (!E)
+    return nullptr;
+
+  switch (Traversal) {
+  case TK_AsIs:
+    return E;
+  case TK_IgnoreUnlessSpelledInSource:
+    return E->IgnoreUnlessSpelledInSource();
+  }
+  llvm_unreachable("Invalid Traversal type!");
+}
+
+DynTypedNode ParentMapContext::traverseIgnored(const DynTypedNode &N) const {
+  if (const auto *E = N.get<Expr>()) {
+    return DynTypedNode::create(*traverseIgnored(E));
+  }
+  return N;
+}
+
 /// Template specializations to abstract away from pointers and TypeLocs.
 /// @{
 template <typename T> static DynTypedNode createDynTypedNode(const T &Node) {
diff --git a/clang/lib/AST/QualTypeNames.cpp b/clang/lib/AST/QualTypeNames.cpp
index a2f9309..1918416 100644
--- a/clang/lib/AST/QualTypeNames.cpp
+++ b/clang/lib/AST/QualTypeNames.cpp
@@ -125,7 +125,7 @@ static const Type *getFullyQualifiedTemplateType(const ASTContext &Ctx,
   // which can point to a template instantiation with no sugar in any of
   // its template argument, however we still need to fully qualify them.
 
-  const auto *TD = TSTRecord->getOriginalDecl();
+  const auto *TD = TSTRecord->getDecl();
   const auto *TSTDecl = dyn_cast<ClassTemplateSpecializationDecl>(TD);
   if (!TSTDecl)
     return Ctx.getTagType(Keyword, Qualifier, TD, /*OwnsTag=*/false)
@@ -232,7 +232,7 @@ static NestedNameSpecifier getFullyQualifiedNestedNameSpecifier(
     // Find decl context.
     const TypeDecl *TD;
     if (const TagType *TagDeclType = Type->getAs<TagType>())
-      TD = TagDeclType->getOriginalDecl();
+      TD = TagDeclType->getDecl();
     else if (const auto *D = dyn_cast<TypedefType>(Type))
       TD = D->getDecl();
     else
@@ -316,7 +316,7 @@ createNestedNameSpecifierForScopeOf(const ASTContext &Ctx, const Type *TypePtr,
   if (const auto *TDT = dyn_cast<TypedefType>(TypePtr)) {
     Decl = TDT->getDecl();
   } else if (const auto *TagDeclType = dyn_cast<TagType>(TypePtr)) {
-    Decl = TagDeclType->getOriginalDecl();
+    Decl = TagDeclType->getDecl();
   } else if (const auto *TST = dyn_cast<TemplateSpecializationType>(TypePtr)) {
     Decl = TST->getTemplateName().getAsTemplateDecl();
   } else {
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 00b938b..ac18d4d 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -2009,7 +2009,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
     } else if (const BuiltinType *BTy = BaseTy->getAs<BuiltinType>()) {
       performBuiltinTypeAlignmentUpgrade(BTy);
     } else if (const RecordType *RT = BaseTy->getAsCanonical<RecordType>()) {
-      const RecordDecl *RD = RT->getOriginalDecl();
+      const RecordDecl *RD = RT->getDecl();
       const ASTRecordLayout &FieldRecord = Context.getASTRecordLayout(RD);
       PreferredAlign = FieldRecord.getPreferredAlignment();
     }
@@ -2710,7 +2710,7 @@ MicrosoftRecordLayoutBuilder::getAdjustedElementInfo(
     if (const auto *RT = FD->getType()
                              ->getBaseElementTypeUnsafe()
                              ->getAsCanonical<RecordType>()) {
-      auto const &Layout = Context.getASTRecordLayout(RT->getOriginalDecl());
+      auto const &Layout = Context.getASTRecordLayout(RT->getDecl());
       EndsWithZeroSizedObject = Layout.endsWithZeroSizedObject();
       FieldRequiredAlignment = std::max(FieldRequiredAlignment,
                                         Layout.getRequiredAlignment());
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index cf5e914..41aebdb 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -1401,7 +1401,7 @@ static void dumpBasePath(raw_ostream &OS, const CastExpr *Node) {
       OS << " -> ";
 
     const auto *RD = cast<CXXRecordDecl>(
-        Base->getType()->castAsCanonical<RecordType>()->getOriginalDecl());
+        Base->getType()->castAsCanonical<RecordType>()->getDecl());
 
     if (Base->isVirtual())
       OS << "virtual ";
@@ -2180,7 +2180,7 @@ void TextNodeDumper::VisitTagType(const TagType *T) {
       K != ElaboratedTypeKeyword::None)
     OS << ' ' << TypeWithKeyword::getKeywordName(K);
   dumpNestedNameSpecifier(T->getQualifier());
-  dumpDeclRef(T->getOriginalDecl());
+  dumpDeclRef(T->getDecl());
 }
 
 void TextNodeDumper::VisitTemplateTypeParmType(const TemplateTypeParmType *T) {
@@ -2232,7 +2232,7 @@ void TextNodeDumper::VisitTemplateSpecializationType(
 
 void TextNodeDumper::VisitInjectedClassNameType(
     const InjectedClassNameType *T) {
-  dumpDeclRef(T->getOriginalDecl());
+  dumpDeclRef(T->getDecl());
 }
 
 void TextNodeDumper::VisitObjCInterfaceType(const ObjCInterfaceType *T) {
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index ee7a68e..4548af1 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -114,7 +114,7 @@ const IdentifierInfo *QualType::getBaseTypeIdentifier() const {
   if (ty->isPointerOrReferenceType())
     return ty->getPointeeType().getBaseTypeIdentifier();
   if (const auto *TT = ty->getAs<TagType>())
-    ND = TT->getOriginalDecl();
+    ND = TT->getDecl();
   else if (ty->getTypeClass() == Type::Typedef)
     ND = ty->castAs<TypedefType>()->getDecl();
   else if (ty->isArrayType())
@@ -671,13 +671,13 @@ const Type *Type::getUnqualifiedDesugaredType() const {
 
 bool Type::isClassType() const {
   if (const auto *RT = getAsCanonical<RecordType>())
-    return RT->getOriginalDecl()->isClass();
+    return RT->getDecl()->isClass();
   return false;
 }
 
 bool Type::isStructureType() const {
   if (const auto *RT = getAsCanonical<RecordType>())
-    return RT->getOriginalDecl()->isStruct();
+    return RT->getDecl()->isStruct();
   return false;
 }
 
@@ -685,7 +685,7 @@ bool Type::isStructureTypeWithFlexibleArrayMember() const {
   const auto *RT = getAsCanonical<RecordType>();
   if (!RT)
     return false;
-  const auto *Decl = RT->getOriginalDecl();
+  const auto *Decl = RT->getDecl();
   if (!Decl->isStruct())
     return false;
   return Decl->getDefinitionOrSelf()->hasFlexibleArrayMember();
@@ -699,13 +699,13 @@ bool Type::isObjCBoxableRecordType() const {
 
 bool Type::isInterfaceType() const {
   if (const auto *RT = getAsCanonical<RecordType>())
-    return RT->getOriginalDecl()->isInterface();
+    return RT->getDecl()->isInterface();
   return false;
 }
 
 bool Type::isStructureOrClassType() const {
   if (const auto *RT = getAsCanonical<RecordType>())
-    return RT->getOriginalDecl()->isStructureOrClass();
+    return RT->getDecl()->isStructureOrClass();
   return false;
 }
 
@@ -717,7 +717,7 @@ bool Type::isVoidPointerType() const {
 
 bool Type::isUnionType() const {
   if (const auto *RT = getAsCanonical<RecordType>())
-    return RT->getOriginalDecl()->isUnion();
+    return RT->getDecl()->isUnion();
   return false;
 }
 
@@ -734,7 +734,7 @@ bool Type::isComplexIntegerType() const {
 
 bool Type::isScopedEnumeralType() const {
   if (const auto *ET = getAsCanonical<EnumType>())
-    return ET->getOriginalDecl()->isScoped();
+    return ET->getDecl()->isScoped();
   return false;
 }
 
@@ -768,13 +768,13 @@ QualType Type::getPointeeType() const {
 const RecordType *Type::getAsStructureType() const {
   // If this is directly a structure type, return it.
   if (const auto *RT = dyn_cast<RecordType>(this)) {
-    if (RT->getOriginalDecl()->isStruct())
+    if (RT->getDecl()->isStruct())
       return RT;
   }
 
   // If the canonical form of this type isn't the right kind, reject it.
   if (const auto *RT = dyn_cast<RecordType>(CanonicalType)) {
-    if (!RT->getOriginalDecl()->isStruct())
+    if (!RT->getDecl()->isStruct())
       return nullptr;
 
     // If this is a typedef for a structure type, strip the typedef off without
@@ -787,13 +787,13 @@ const RecordType *Type::getAsStructureType() const {
 const RecordType *Type::getAsUnionType() const {
   // If this is directly a union type, return it.
   if (const auto *RT = dyn_cast<RecordType>(this)) {
-    if (RT->getOriginalDecl()->isUnion())
+    if (RT->getDecl()->isUnion())
       return RT;
   }
 
   // If the canonical form of this type isn't the right kind, reject it.
   if (const auto *RT = dyn_cast<RecordType>(CanonicalType)) {
-    if (!RT->getOriginalDecl()->isUnion())
+    if (!RT->getDecl()->isUnion())
       return nullptr;
 
     // If this is a typedef for a union type, strip the typedef off without
@@ -2107,7 +2107,7 @@ bool Type::isIntegralType(const ASTContext &Ctx) const {
   // Complete enum types are integral in C.
   if (!Ctx.getLangOpts().CPlusPlus)
     if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
-      return IsEnumDeclComplete(ET->getOriginalDecl());
+      return IsEnumDeclComplete(ET->getDecl());
 
   return isBitIntType();
 }
@@ -2124,7 +2124,7 @@ bool Type::isIntegralOrUnscopedEnumerationType() const {
 
 bool Type::isUnscopedEnumerationType() const {
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
-    return !ET->getOriginalDecl()->isScoped();
+    return !ET->getDecl()->isScoped();
 
   return false;
 }
@@ -2328,7 +2328,7 @@ bool Type::isRealType() const {
     return BT->getKind() >= BuiltinType::Bool &&
            BT->getKind() <= BuiltinType::Ibm128;
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType)) {
-    const auto *ED = ET->getOriginalDecl();
+    const auto *ED = ET->getDecl();
     return !ED->isScoped() && ED->getDefinitionOrSelf()->isComplete();
   }
   return isBitIntType();
@@ -2345,7 +2345,7 @@ bool Type::isArithmeticType() const {
     // C++0x: Enumerations are not arithmetic types. For now, just return
     // false for scoped enumerations since that will disable any
     // unwanted implicit conversions.
-    const auto *ED = ET->getOriginalDecl();
+    const auto *ED = ET->getDecl();
     return !ED->isScoped() && ED->getDefinitionOrSelf()->isComplete();
   }
   return isa<ComplexType>(CanonicalType) || isBitIntType();
@@ -2410,8 +2410,7 @@ Type::ScalarTypeKind Type::getScalarTypeKind() const {
 /// includes union types.
 bool Type::isAggregateType() const {
   if (const auto *Record = dyn_cast<RecordType>(CanonicalType)) {
-    if (const auto *ClassDecl =
-            dyn_cast<CXXRecordDecl>(Record->getOriginalDecl()))
+    if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(Record->getDecl()))
       return ClassDecl->isAggregate();
 
     return true;
@@ -2746,8 +2745,8 @@ bool QualType::isCXX98PODType(const ASTContext &Context) const {
     return true;
 
   case Type::Record:
-    if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(
-            cast<RecordType>(CanonicalType)->getOriginalDecl()))
+    if (const auto *ClassDecl =
+            dyn_cast<CXXRecordDecl>(cast<RecordType>(CanonicalType)->getDecl()))
       return ClassDecl->isPOD();
 
     // C struct/union is POD.
@@ -3179,7 +3178,7 @@ bool Type::isNothrowT() const {
 
 bool Type::isAlignValT() const {
   if (const auto *ET = getAsCanonical<EnumType>()) {
-    const auto *ED = ET->getOriginalDecl();
+    const auto *ED = ET->getDecl();
     IdentifierInfo *II = ED->getIdentifier();
     if (II && II->isStr("align_val_t") && ED->isInStdNamespace())
       return true;
@@ -3189,7 +3188,7 @@ bool Type::isAlignValT() const {
 
 bool Type::isStdByteType() const {
   if (const auto *ET = getAsCanonical<EnumType>()) {
-    const auto *ED = ET->getOriginalDecl();
+    const auto *ED = ET->getDecl();
     IdentifierInfo *II = ED->getIdentifier();
     if (II && II->isStr("byte") && ED->isInStdNamespace())
       return true;
@@ -4321,7 +4320,7 @@ bool RecordType::hasConstFields() const {
 
   while (RecordTypeList.size() > NextToCheckIndex) {
     for (FieldDecl *FD : RecordTypeList[NextToCheckIndex]
-                             ->getOriginalDecl()
+                             ->getDecl()
                              ->getDefinitionOrSelf()
                              ->fields()) {
       QualType FieldTy = FD->getType();
@@ -4815,8 +4814,7 @@ static CachedProperties computeCachedProperties(const Type *T) {
 
   case Type::Record:
   case Type::Enum: {
-    const TagDecl *Tag =
-        cast<TagType>(T)->getOriginalDecl()->getDefinitionOrSelf();
+    const auto *Tag = cast<TagType>(T)->getDecl()->getDefinitionOrSelf();
 
     // C++ [basic.link]p8:
     //     - it is a class or enumeration type that is named (or has a name
@@ -4926,7 +4924,7 @@ LinkageInfo LinkageComputer::computeTypeLinkageInfo(const Type *T) {
   case Type::Record:
   case Type::Enum:
     return getDeclLinkageAndVisibility(
-        cast<TagType>(T)->getOriginalDecl()->getDefinitionOrSelf());
+        cast<TagType>(T)->getDecl()->getDefinitionOrSelf());
 
   case Type::Complex:
     return computeTypeLinkageInfo(cast<ComplexType>(T)->getElementType());
@@ -5129,7 +5127,7 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const {
     llvm_unreachable("unknown builtin type");
 
   case Type::Record: {
-    const RecordDecl *RD = cast<RecordType>(type)->getOriginalDecl();
+    const auto *RD = cast<RecordType>(type)->getDecl();
     // For template specializations, look only at primary template attributes.
     // This is a consistent regardless of whether the instantiation is known.
     if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD))
@@ -5327,7 +5325,7 @@ bool Type::isCARCBridgableType() const {
 /// Check if the specified type is the CUDA device builtin surface type.
 bool Type::isCUDADeviceBuiltinSurfaceType() const {
   if (const auto *RT = getAsCanonical<RecordType>())
-    return RT->getOriginalDecl()
+    return RT->getDecl()
         ->getMostRecentDecl()
         ->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>();
   return false;
@@ -5336,7 +5334,7 @@ bool Type::isCUDADeviceBuiltinSurfaceType() const {
 /// Check if the specified type is the CUDA device builtin texture type.
 bool Type::isCUDADeviceBuiltinTextureType() const {
   if (const auto *RT = getAsCanonical<RecordType>())
-    return RT->getOriginalDecl()
+    return RT->getDecl()
         ->getMostRecentDecl()
         ->hasAttr<CUDADeviceBuiltinTextureTypeAttr>();
   return false;
diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp
index e952e82..f54ccf0 100644
--- a/clang/lib/AST/TypeLoc.cpp
+++ b/clang/lib/AST/TypeLoc.cpp
@@ -303,8 +303,7 @@ bool TypeSpecTypeLoc::isKind(const TypeLoc &TL) {
 }
 
 bool TagTypeLoc::isDefinition() const {
-  return getTypePtr()->isTagOwned() &&
-         getOriginalDecl()->isCompleteDefinition();
+  return getTypePtr()->isTagOwned() && getDecl()->isCompleteDefinition();
 }
 
 // Reimplemented to account for GNU/C++ extension
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 66a1b68..2da7789 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1421,7 +1421,7 @@ void TypePrinter::printDeducedTemplateSpecializationBefore(
     } else {
       // Should only get here for canonical types.
       const auto *CD = cast<ClassTemplateSpecializationDecl>(
-          cast<RecordType>(T->getDeducedType())->getOriginalDecl());
+          cast<RecordType>(T->getDeducedType())->getDecl());
       DeducedTD = CD->getSpecializedTemplate();
       Args = CD->getTemplateArgs().asArray();
     }
@@ -1565,7 +1565,7 @@ void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS,
 }
 
 void TypePrinter::printTagType(const TagType *T, raw_ostream &OS) {
-  TagDecl *D = T->getOriginalDecl();
+  TagDecl *D = T->getDecl();
 
   if (Policy.IncludeTagDefinition && T->isTagOwned()) {
     D->print(OS, Policy, Indentation);
@@ -1669,11 +1669,11 @@ void TypePrinter::printTagType(const TagType *T, raw_ostream &OS) {
 void TypePrinter::printRecordBefore(const RecordType *T, raw_ostream &OS) {
   // Print the preferred name if we have one for this type.
   if (Policy.UsePreferredNames) {
-    for (const auto *PNA : T->getOriginalDecl()
+    for (const auto *PNA : T->getDecl()
                                ->getMostRecentDecl()
                                ->specific_attrs<PreferredNameAttr>()) {
       if (!declaresSameEntity(PNA->getTypedefType()->getAsCXXRecordDecl(),
-                              T->getOriginalDecl()))
+                              T->getDecl()))
         continue;
       // Find the outermost typedef or alias template.
       QualType T = PNA->getTypedefType();
@@ -1700,11 +1700,11 @@ void TypePrinter::printEnumAfter(const EnumType *T, raw_ostream &OS) {}
 
 void TypePrinter::printInjectedClassNameBefore(const InjectedClassNameType *T,
                                                raw_ostream &OS) {
-  const ASTContext &Ctx = T->getOriginalDecl()->getASTContext();
+  const ASTContext &Ctx = T->getDecl()->getASTContext();
   IncludeStrongLifetimeRAII Strong(Policy);
   T->getTemplateName(Ctx).print(OS, Policy);
   if (Policy.PrintInjectedClassNameWithArguments) {
-    auto *Decl = T->getOriginalDecl();
+    auto *Decl = T->getDecl();
     // FIXME: Use T->getTemplateArgs(Ctx) when that supports as-written
     // arguments.
     if (auto *RD = dyn_cast<ClassTemplateSpecializationDecl>(Decl)) {
diff --git a/clang/lib/AST/VTableBuilder.cpp b/clang/lib/AST/VTableBuilder.cpp
index 6cec526..3ded3a5 100644
--- a/clang/lib/AST/VTableBuilder.cpp
+++ b/clang/lib/AST/VTableBuilder.cpp
@@ -312,13 +312,12 @@ ComputeReturnAdjustmentBaseOffset(ASTContext &Context,
     return BaseOffset();
   }
 
-  const CXXRecordDecl *DerivedRD =
-      cast<CXXRecordDecl>(
-          cast<RecordType>(CanDerivedReturnType)->getOriginalDecl())
+  const auto *DerivedRD =
+      cast<CXXRecordDecl>(cast<RecordType>(CanDerivedReturnType)->getDecl())
           ->getDefinitionOrSelf();
 
-  const CXXRecordDecl *BaseRD = cast<CXXRecordDecl>(
-      cast<RecordType>(CanBaseReturnType)->getOriginalDecl());
+  const auto *BaseRD =
+      cast<CXXRecordDecl>(cast<RecordType>(CanBaseReturnType)->getDecl());
 
   return ComputeBaseOffset(Context, BaseRD, DerivedRD);
 }
diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp
index a56fdb1..77750cf 100644
--- a/clang/lib/Analysis/ThreadSafety.cpp
+++ b/clang/lib/Analysis/ThreadSafety.cpp
@@ -2032,9 +2032,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D,
       assert(inserted.second && "Are we visiting the same expression again?");
       if (isa<CXXConstructExpr>(Exp))
         Self = Placeholder;
-      if (TagT->getOriginalDecl()
-              ->getMostRecentDecl()
-              ->hasAttr<ScopedLockableAttr>())
+      if (TagT->getDecl()->getMostRecentDecl()->hasAttr<ScopedLockableAttr>())
         Scp = CapabilityExpr(Placeholder, Exp->getType(), /*Neg=*/false);
     }
 
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index f034514..641a3db 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/LangStandard.h"
 #include "llvm/Support/Path.h"
 
 using namespace clang;
@@ -243,3 +244,46 @@ LLVM_DUMP_METHOD void FPOptionsOverride::dump() {
 #include "clang/Basic/FPOptions.def"
   llvm::errs() << "\n";
 }
+
+std::optional<uint32_t> LangOptions::getCPlusPlusLangStd() const {
+  if (!CPlusPlus)
+    return std::nullopt;
+
+  LangStandard::Kind Std;
+  if (CPlusPlus26)
+    Std = LangStandard::lang_cxx26;
+  else if (CPlusPlus23)
+    Std = LangStandard::lang_cxx23;
+  else if (CPlusPlus20)
+    Std = LangStandard::lang_cxx20;
+  else if (CPlusPlus17)
+    Std = LangStandard::lang_cxx17;
+  else if (CPlusPlus14)
+    Std = LangStandard::lang_cxx14;
+  else if (CPlusPlus11)
+    Std = LangStandard::lang_cxx11;
+  else
+    Std = LangStandard::lang_cxx98;
+
+  return LangStandard::getLangStandardForKind(Std).getVersion();
+}
+
+std::optional<uint32_t> LangOptions::getCLangStd() const {
+  LangStandard::Kind Std;
+  if (C2y)
+    Std = LangStandard::lang_c2y;
+  else if (C23)
+    Std = LangStandard::lang_c23;
+  else if (C17)
+    Std = LangStandard::lang_c17;
+  else if (C11)
+    Std = LangStandard::lang_c11;
+  else if (C99)
+    Std = LangStandard::lang_c99;
+  else if (!GNUMode && Digraphs)
+    Std = LangStandard::lang_c94;
+  else
+    return std::nullopt;
+
+  return LangStandard::getLangStandardForKind(Std).getVersion();
+}
diff --git a/clang/lib/Basic/LangStandards.cpp b/clang/lib/Basic/LangStandards.cpp
index c49d095..01c524b 100644
--- a/clang/lib/Basic/LangStandards.cpp
+++ b/clang/lib/Basic/LangStandards.cpp
@@ -46,16 +46,18 @@ StringRef clang::languageToString(Language L) {
   llvm_unreachable("unhandled language kind");
 }
 
-#define LANGSTANDARD(id, name, lang, desc, features)                           \
-  static const LangStandard Lang_##id = {name, desc, features, Language::lang};
+#define LANGSTANDARD(id, name, lang, desc, features, version)                  \
+  static const LangStandard Lang_##id = {name, desc, features, Language::lang, \
+                                         version};
 #include "clang/Basic/LangStandards.def"
 
 const LangStandard &LangStandard::getLangStandardForKind(Kind K) {
   switch (K) {
   case lang_unspecified:
     llvm::report_fatal_error("getLangStandardForKind() on unspecified kind");
-#define LANGSTANDARD(id, name, lang, desc, features) \
-    case lang_##id: return Lang_##id;
+#define LANGSTANDARD(id, name, lang, desc, features, version)                  \
+  case lang_##id:                                                              \
+    return Lang_##id;
 #include "clang/Basic/LangStandards.def"
   }
   llvm_unreachable("Invalid language kind!");
@@ -63,7 +65,8 @@ const LangStandard &LangStandard::getLangStandardForKind(Kind K) {
 
 LangStandard::Kind LangStandard::getLangKind(StringRef Name) {
   return llvm::StringSwitch<Kind>(Name)
-#define LANGSTANDARD(id, name, lang, desc, features) .Case(name, lang_##id)
+#define LANGSTANDARD(id, name, lang, desc, features, version)                  \
+  .Case(name, lang_##id)
 #define LANGSTANDARD_ALIAS(id, alias) .Case(alias, lang_##id)
 #include "clang/Basic/LangStandards.def"
       .Default(lang_unspecified);
diff --git a/clang/lib/Basic/Targets/AVR.cpp b/clang/lib/Basic/Targets/AVR.cpp
index bbe7b01..2673669 100644
--- a/clang/lib/Basic/Targets/AVR.cpp
+++ b/clang/lib/Basic/Targets/AVR.cpp
@@ -420,23 +420,23 @@ static MCUInfo AVRMcus[] = {
 
 static bool ArchHasELPM(StringRef Arch) {
   return llvm::StringSwitch<bool>(Arch)
-    .Cases("31", "51", "6", true)
-    .Cases("102", "104", "105", "106", "107", true)
-    .Default(false);
+      .Cases({"31", "51", "6"}, true)
+      .Cases({"102", "104", "105", "106", "107"}, true)
+      .Default(false);
 }
 
 static bool ArchHasELPMX(StringRef Arch) {
   return llvm::StringSwitch<bool>(Arch)
-    .Cases("51", "6", true)
-    .Cases("102", "104", "105", "106", "107", true)
-    .Default(false);
+      .Cases({"51", "6"}, true)
+      .Cases({"102", "104", "105", "106", "107"}, true)
+      .Default(false);
 }
 
 static bool ArchHasMOVW(StringRef Arch) {
   return llvm::StringSwitch<bool>(Arch)
-    .Cases("25", "35", "4", "5", "51", "6", true)
-    .Cases("102", "103", "104", "105", "106", "107", true)
-    .Default(false);
+      .Cases({"25", "35", "4", "5", "51", "6"}, true)
+      .Cases({"102", "103", "104", "105", "106", "107"}, true)
+      .Default(false);
 }
 
 static bool ArchHasLPMX(StringRef Arch) {
@@ -445,16 +445,16 @@ static bool ArchHasLPMX(StringRef Arch) {
 
 static bool ArchHasMUL(StringRef Arch) {
   return llvm::StringSwitch<bool>(Arch)
-    .Cases("4", "5", "51", "6", true)
-    .Cases("102", "103", "104", "105", "106", "107", true)
-    .Default(false);
+      .Cases({"4", "5", "51", "6"}, true)
+      .Cases({"102", "103", "104", "105", "106", "107"}, true)
+      .Default(false);
 }
 
 static bool ArchHasJMPCALL(StringRef Arch) {
   return llvm::StringSwitch<bool>(Arch)
-    .Cases("3", "31", "35", "5", "51", "6", true)
-    .Cases("102", "103", "104", "105", "106", "107", true)
-    .Default(false);
+      .Cases({"3", "31", "35", "5", "51", "6"}, true)
+      .Cases({"102", "103", "104", "105", "106", "107"}, true)
+      .Default(false);
 }
 
 static bool ArchHas3BytePC(StringRef Arch) {
diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h
index bd13c9e..a21a593 100644
--- a/clang/lib/Basic/Targets/DirectX.h
+++ b/clang/lib/Basic/Targets/DirectX.h
@@ -64,8 +64,11 @@ public:
     NoAsmVariants = true;
     PlatformMinVersion = Triple.getOSVersion();
     PlatformName = llvm::Triple::getOSTypeName(Triple.getOS());
+    // TODO: We need to align vectors on the element size generally, but for now
+    // we hard code this for 3-element 32- and 64-bit vectors as a workaround.
+    // See https://github.com/llvm/llvm-project/issues/123968
     resetDataLayout("e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:"
-                    "32-f64:64-n8:16:32:64");
+                    "32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64");
     TheCXXABI.set(TargetCXXABI::GenericItanium);
   }
   bool useFP16ConversionIntrinsics() const override { return false; }
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 187815c..ef4973c 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -625,6 +625,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_ArrowlakeS:
   case CK_Lunarlake:
   case CK_Pantherlake:
+  case CK_Wildcatlake:
   case CK_Sierraforest:
   case CK_Grandridge:
   case CK_Graniterapids:
@@ -1613,6 +1614,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
     case CK_ArrowlakeS:
     case CK_Lunarlake:
     case CK_Pantherlake:
+    case CK_Wildcatlake:
     case CK_Sierraforest:
     case CK_Grandridge:
     case CK_Graniterapids:
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index a6f10e6..84acc74 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -519,6 +519,14 @@ public:
     return createGlobal(module, loc, uniqueName, type, isConstant, linkage);
   }
 
+  cir::StackSaveOp createStackSave(mlir::Location loc, mlir::Type ty) {
+    return cir::StackSaveOp::create(*this, loc, ty);
+  }
+
+  cir::StackRestoreOp createStackRestore(mlir::Location loc, mlir::Value v) {
+    return cir::StackRestoreOp::create(*this, loc, v);
+  }
+
   mlir::Value createSetBitfield(mlir::Location loc, mlir::Type resultType,
                                 Address dstAddr, mlir::Type storageType,
                                 mlir::Value src, const CIRGenBitFieldInfo &info,
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index dd357ce..89f4926 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -478,8 +478,7 @@ void CIRGenFunction::getVTablePointers(BaseSubobject base,
 
   for (const auto &nextBase : rd->bases()) {
     const auto *baseDecl =
-        cast<CXXRecordDecl>(
-            nextBase.getType()->castAs<RecordType>()->getOriginalDecl())
+        cast<CXXRecordDecl>(nextBase.getType()->castAs<RecordType>()->getDecl())
             ->getDefinitionOrSelf();
 
     // Ignore classes without a vtable.
@@ -1025,7 +1024,7 @@ void CIRGenFunction::enterDtorCleanups(const CXXDestructorDecl *dd,
 
     // Anonymous union members do not have their destructors called.
     const RecordType *rt = type->getAsUnionType();
-    if (rt && rt->getOriginalDecl()->isAnonymousStructOrUnion())
+    if (rt && rt->getDecl()->isAnonymousStructOrUnion())
       continue;
 
     CleanupKind cleanupKind = getCleanupKind(dtorKind);
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index 039d290..4a19d91 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -44,38 +44,70 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d,
 
   // If the type is variably-modified, emit all the VLA sizes for it.
   if (ty->isVariablyModifiedType())
-    cgm.errorNYI(d.getSourceRange(), "emitAutoVarDecl: variably modified type");
+    emitVariablyModifiedType(ty);
 
   assert(!cir::MissingFeatures::openMP());
 
   Address address = Address::invalid();
-  if (!ty->isConstantSizeType())
-    cgm.errorNYI(d.getSourceRange(), "emitAutoVarDecl: non-constant size type");
-
-  // A normal fixed sized variable becomes an alloca in the entry block,
-  // unless:
-  // - it's an NRVO variable.
-  // - we are compiling OpenMP and it's an OpenMP local variable.
-  if (nrvo) {
-    // The named return value optimization: allocate this variable in the
-    // return slot, so that we can elide the copy when returning this
-    // variable (C++0x [class.copy]p34).
-    address = returnValue;
-
-    if (const RecordDecl *rd = ty->getAsRecordDecl()) {
-      if (const auto *cxxrd = dyn_cast<CXXRecordDecl>(rd);
-          (cxxrd && !cxxrd->hasTrivialDestructor()) ||
-          rd->isNonTrivialToPrimitiveDestroy())
-        cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: set NRVO flag");
+  if (ty->isConstantSizeType()) {
+    // A normal fixed sized variable becomes an alloca in the entry block,
+    // unless:
+    // - it's an NRVO variable.
+    // - we are compiling OpenMP and it's an OpenMP local variable.
+    if (nrvo) {
+      // The named return value optimization: allocate this variable in the
+      // return slot, so that we can elide the copy when returning this
+      // variable (C++0x [class.copy]p34).
+      address = returnValue;
+
+      if (const RecordDecl *rd = ty->getAsRecordDecl()) {
+        if (const auto *cxxrd = dyn_cast<CXXRecordDecl>(rd);
+            (cxxrd && !cxxrd->hasTrivialDestructor()) ||
+            rd->isNonTrivialToPrimitiveDestroy())
+          cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: set NRVO flag");
+      }
+    } else {
+      // A normal fixed sized variable becomes an alloca in the entry block,
+      mlir::Type allocaTy = convertTypeForMem(ty);
+      // Create the temp alloca and declare variable using it.
+      address = createTempAlloca(allocaTy, alignment, loc, d.getName(),
+                                 /*arraySize=*/nullptr, /*alloca=*/nullptr, ip);
+      declare(address.getPointer(), &d, ty, getLoc(d.getSourceRange()),
+              alignment);
     }
   } else {
-    // A normal fixed sized variable becomes an alloca in the entry block,
-    mlir::Type allocaTy = convertTypeForMem(ty);
-    // Create the temp alloca and declare variable using it.
-    address = createTempAlloca(allocaTy, alignment, loc, d.getName(),
-                               /*arraySize=*/nullptr, /*alloca=*/nullptr, ip);
-    declare(address.getPointer(), &d, ty, getLoc(d.getSourceRange()),
-            alignment);
+    // Non-constant size type
+    assert(!cir::MissingFeatures::openMP());
+    if (!didCallStackSave) {
+      // Save the stack.
+      cir::PointerType defaultTy = AllocaInt8PtrTy;
+      CharUnits align = CharUnits::fromQuantity(
+          cgm.getDataLayout().getAlignment(defaultTy, false));
+      Address stack = createTempAlloca(defaultTy, align, loc, "saved_stack");
+
+      mlir::Value v = builder.createStackSave(loc, defaultTy);
+      assert(v.getType() == AllocaInt8PtrTy);
+      builder.createStore(loc, v, stack);
+
+      didCallStackSave = true;
+
+      // Push a cleanup block and restore the stack there.
+      // FIXME: in general circumstances, this should be an EH cleanup.
+      pushStackRestore(NormalCleanup, stack);
+    }
+
+    VlaSizePair vlaSize = getVLASize(ty);
+    mlir::Type memTy = convertTypeForMem(vlaSize.type);
+
+    // Allocate memory for the array.
+    address =
+        createTempAlloca(memTy, alignment, loc, d.getName(), vlaSize.numElts,
+                         /*alloca=*/nullptr, builder.saveInsertionPoint());
+
+    // If we have debug info enabled, properly describe the VLA dimensions for
+    // this type by registering the vla size expression for each of the
+    // dimensions.
+    assert(!cir::MissingFeatures::generateDebugInfo());
   }
 
   emission.addr = address;
@@ -696,6 +728,16 @@ struct DestroyObject final : EHScopeStack::Cleanup {
     cgf.emitDestroy(addr, type, destroyer);
   }
 };
+
+struct CallStackRestore final : EHScopeStack::Cleanup {
+  Address stack;
+  CallStackRestore(Address stack) : stack(stack) {}
+  void emit(CIRGenFunction &cgf) override {
+    mlir::Location loc = stack.getPointer().getLoc();
+    mlir::Value v = cgf.getBuilder().createLoad(loc, stack);
+    cgf.getBuilder().createStackRestore(loc, v);
+  }
+};
 } // namespace
 
 void CIRGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr,
@@ -805,6 +847,10 @@ CIRGenFunction::getDestroyer(QualType::DestructionKind kind) {
   llvm_unreachable("Unknown DestructionKind");
 }
 
+void CIRGenFunction::pushStackRestore(CleanupKind kind, Address spMem) {
+  ehStack.pushCleanup<CallStackRestore>(kind, spMem);
+}
+
 /// Enter a destroy cleanup for the given local variable.
 void CIRGenFunction::emitAutoVarTypeCleanup(
     const CIRGenFunction::AutoVarEmission &emission,
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index f416571..4897c29 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -2068,7 +2068,7 @@ mlir::Value CIRGenFunction::emitAlloca(StringRef name, mlir::Type ty,
     mlir::OpBuilder::InsertionGuard guard(builder);
     builder.restoreInsertionPoint(ip);
     addr = builder.createAlloca(loc, /*addr type*/ localVarPtrTy,
-                                /*var type*/ ty, name, alignIntAttr);
+                                /*var type*/ ty, name, alignIntAttr, arraySize);
     assert(!cir::MissingFeatures::astVarDeclInterface());
   }
   return addr;
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index 89e9ec4..81e5fe2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -614,7 +614,7 @@ bool ConstRecordBuilder::applyZeroInitPadding(const ASTRecordLayout &layout,
 bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
   RecordDecl *rd = ile->getType()
                        ->castAs<clang::RecordType>()
-                       ->getOriginalDecl()
+                       ->getDecl()
                        ->getDefinitionOrSelf();
   const ASTRecordLayout &layout = cgm.getASTContext().getASTRecordLayout(rd);
 
@@ -817,9 +817,8 @@ bool ConstRecordBuilder::build(const APValue &val, const RecordDecl *rd,
 
 mlir::Attribute ConstRecordBuilder::finalize(QualType type) {
   type = type.getNonReferenceType();
-  RecordDecl *rd = type->castAs<clang::RecordType>()
-                       ->getOriginalDecl()
-                       ->getDefinitionOrSelf();
+  RecordDecl *rd =
+      type->castAs<clang::RecordType>()->getDecl()->getDefinitionOrSelf();
   mlir::Type valTy = cgm.convertType(type);
   return builder.build(valTy, rd->hasFlexibleArrayMember());
 }
@@ -842,9 +841,8 @@ mlir::Attribute ConstRecordBuilder::buildRecord(ConstantEmitter &emitter,
   ConstantAggregateBuilder constant(emitter.cgm);
   ConstRecordBuilder builder(emitter, constant, CharUnits::Zero());
 
-  const RecordDecl *rd = valTy->castAs<clang::RecordType>()
-                             ->getOriginalDecl()
-                             ->getDefinitionOrSelf();
+  const RecordDecl *rd =
+      valTy->castAs<clang::RecordType>()->getDecl()->getDefinitionOrSelf();
   const CXXRecordDecl *cd = dyn_cast<CXXRecordDecl>(rd);
   if (!builder.build(val, rd, false, cd, CharUnits::Zero()))
     return nullptr;
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 01a43a99..ba36cbe 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -410,6 +410,8 @@ void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType,
   curFn = fn;
 
   const Decl *d = gd.getDecl();
+
+  didCallStackSave = false;
   curCodeDecl = d;
   const auto *fd = dyn_cast_or_null<FunctionDecl>(d);
   curFuncDecl = d->getNonClosureContext();
@@ -1006,6 +1008,41 @@ mlir::Value CIRGenFunction::emitAlignmentAssumption(
                                  offsetValue);
 }
 
+CIRGenFunction::VlaSizePair CIRGenFunction::getVLASize(QualType type) {
+  const VariableArrayType *vla =
+      cgm.getASTContext().getAsVariableArrayType(type);
+  assert(vla && "type was not a variable array type!");
+  return getVLASize(vla);
+}
+
+CIRGenFunction::VlaSizePair
+CIRGenFunction::getVLASize(const VariableArrayType *type) {
+  // The number of elements so far; always size_t.
+  mlir::Value numElements;
+
+  QualType elementType;
+  do {
+    elementType = type->getElementType();
+    mlir::Value vlaSize = vlaSizeMap[type->getSizeExpr()];
+    assert(vlaSize && "no size for VLA!");
+    assert(vlaSize.getType() == SizeTy);
+
+    if (!numElements) {
+      numElements = vlaSize;
+    } else {
+      // It's undefined behavior if this wraps around, so mark it that way.
+      // FIXME: Teach -fsanitize=undefined to trap this.
+
+      numElements =
+          builder.createMul(numElements.getLoc(), numElements, vlaSize,
+                            cir::OverflowBehavior::NoUnsignedWrap);
+    }
+  } while ((type = getContext().getAsVariableArrayType(elementType)));
+
+  assert(numElements && "Undefined elements number");
+  return {numElements, elementType};
+}
+
 // TODO(cir): Most of this function can be shared between CIRGen
 // and traditional LLVM codegen
 void CIRGenFunction::emitVariablyModifiedType(QualType type) {
@@ -1086,7 +1123,26 @@ void CIRGenFunction::emitVariablyModifiedType(QualType type) {
       break;
 
     case Type::VariableArray: {
-      cgm.errorNYI("CIRGenFunction::emitVariablyModifiedType VLA");
+      // Losing element qualification here is fine.
+      const VariableArrayType *vat = cast<clang::VariableArrayType>(ty);
+
+      // Unknown size indication requires no size computation.
+      // Otherwise, evaluate and record it.
+      if (const Expr *sizeExpr = vat->getSizeExpr()) {
+        // It's possible that we might have emitted this already,
+        // e.g. with a typedef and a pointer to it.
+        mlir::Value &entry = vlaSizeMap[sizeExpr];
+        if (!entry) {
+          mlir::Value size = emitScalarExpr(sizeExpr);
+          assert(!cir::MissingFeatures::sanitizers());
+
+          // Always zexting here would be wrong if it weren't
+          // undefined behavior to have a negative bound.
+          // FIXME: What about when size's type is larger than size_t?
+          entry = builder.createIntCast(size, SizeTy);
+        }
+      }
+      type = vat->getElementType();
       break;
     }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index d71de2f..0d64c31 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -149,6 +149,10 @@ public:
   using SymTableTy = llvm::ScopedHashTable<const clang::Decl *, mlir::Value>;
   SymTableTy symbolTable;
 
+  /// Whether a cir.stacksave operation has been added. Used to avoid
+  /// inserting cir.stacksave for multiple VLAs in the same scope.
+  bool didCallStackSave = false;
+
   /// Whether or not a Microsoft-style asm block has been processed within
   /// this fuction. These can potentially set the return value.
   bool sawAsmBlock = false;
@@ -188,6 +192,14 @@ public:
   llvm::DenseMap<const OpaqueValueExpr *, LValue> opaqueLValues;
   llvm::DenseMap<const OpaqueValueExpr *, RValue> opaqueRValues;
 
+  // This keeps track of the associated size for each VLA type.
+  // We track this by the size expression rather than the type itself because
+  // in certain situations, like a const qualifier applied to an VLA typedef,
+  // multiple VLA types can share the same size expression.
+  // FIXME: Maybe this could be a stack of maps that is pushed/popped as we
+  // enter/leave scopes.
+  llvm::DenseMap<const Expr *, mlir::Value> vlaSizeMap;
+
 public:
   /// A non-RAII class containing all the information about a bound
   /// opaque value.  OpaqueValueMapping, below, is a RAII wrapper for
@@ -436,6 +448,20 @@ public:
     }
   };
 
+  struct VlaSizePair {
+    mlir::Value numElts;
+    QualType type;
+
+    VlaSizePair(mlir::Value num, QualType ty) : numElts(num), type(ty) {}
+  };
+
+  /// Returns an MLIR::Value+QualType pair that corresponds to the size,
+  /// in non-variably-sized elements, of a variable length array type,
+  /// plus that largest non-variably-sized element type.  Assumes that
+  /// the type has already been emitted with emitVariablyModifiedType.
+  VlaSizePair getVLASize(const VariableArrayType *type);
+  VlaSizePair getVLASize(QualType type);
+
   void finishFunction(SourceLocation endLoc);
 
   /// Determine whether the given initializer is trivial in the sense
@@ -583,6 +609,8 @@ public:
     return needsEHCleanup(kind) ? NormalAndEHCleanup : NormalCleanup;
   }
 
+  void pushStackRestore(CleanupKind kind, Address spMem);
+
   /// Set the address of a local variable.
   void setAddrOfLocalVar(const clang::VarDecl *vd, Address addr) {
     assert(!localDeclMap.count(vd) && "Decl already exists in LocalDeclMap!");
@@ -854,6 +882,7 @@ public:
 
   protected:
     bool performCleanup;
+    bool oldDidCallStackSave;
 
   private:
     RunCleanupsScope(const RunCleanupsScope &) = delete;
@@ -867,6 +896,8 @@ public:
     explicit RunCleanupsScope(CIRGenFunction &cgf)
         : performCleanup(true), cgf(cgf) {
       cleanupStackDepth = cgf.ehStack.stable_begin();
+      oldDidCallStackSave = cgf.didCallStackSave;
+      cgf.didCallStackSave = false;
       oldCleanupStackDepth = cgf.currentCleanupStackDepth;
       cgf.currentCleanupStackDepth = cleanupStackDepth;
     }
@@ -883,6 +914,7 @@ public:
       assert(performCleanup && "Already forced cleanup");
       {
         mlir::OpBuilder::InsertionGuard guard(cgf.getBuilder());
+        cgf.didCallStackSave = oldDidCallStackSave;
         cgf.popCleanupBlocks(cleanupStackDepth);
         performCleanup = false;
         cgf.currentCleanupStackDepth = oldCleanupStackDepth;
diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
index d30c975..1b85a53 100644
--- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
@@ -744,8 +744,8 @@ static bool shouldUseExternalRttiDescriptor(CIRGenModule &cgm, QualType ty) {
     return false;
 
   if (const auto *recordTy = dyn_cast<RecordType>(ty)) {
-    const CXXRecordDecl *rd =
-        cast<CXXRecordDecl>(recordTy->getOriginalDecl())->getDefinitionOrSelf();
+    const auto *rd =
+        cast<CXXRecordDecl>(recordTy->getDecl())->getDefinitionOrSelf();
     if (!rd->hasDefinition())
       return false;
 
@@ -859,9 +859,7 @@ static bool canUseSingleInheritance(const CXXRecordDecl *rd) {
 
 /// IsIncompleteClassType - Returns whether the given record type is incomplete.
 static bool isIncompleteClassType(const RecordType *recordTy) {
-  return !recordTy->getOriginalDecl()
-              ->getDefinitionOrSelf()
-              ->isCompleteDefinition();
+  return !recordTy->getDecl()->getDefinitionOrSelf()->isCompleteDefinition();
 }
 
 /// Returns whether the given type contains an
@@ -939,8 +937,7 @@ const char *vTableClassNameForType(const CIRGenModule &cgm, const Type *ty) {
   case Type::Atomic:
   // FIXME: GCC treats block pointers as fundamental types?!
   case Type::BlockPointer:
-    cgm.errorNYI("VTableClassNameForType: __fundamental_type_info");
-    break;
+    return "_ZTVN10__cxxabiv123__fundamental_type_infoE";
   case Type::ConstantArray:
   case Type::IncompleteArray:
   case Type::VariableArray:
@@ -957,9 +954,8 @@ const char *vTableClassNameForType(const CIRGenModule &cgm, const Type *ty) {
     break;
 
   case Type::Record: {
-    const CXXRecordDecl *rd =
-        cast<CXXRecordDecl>(cast<RecordType>(ty)->getOriginalDecl())
-            ->getDefinitionOrSelf();
+    const auto *rd = cast<CXXRecordDecl>(cast<RecordType>(ty)->getDecl())
+                         ->getDefinitionOrSelf();
 
     if (!rd->hasDefinition() || !rd->getNumBases()) {
       return classTypeInfo;
@@ -1031,8 +1027,8 @@ static cir::GlobalLinkageKind getTypeInfoLinkage(CIRGenModule &cgm,
       return cir::GlobalLinkageKind::LinkOnceODRLinkage;
 
     if (const RecordType *record = dyn_cast<RecordType>(ty)) {
-      const CXXRecordDecl *rd =
-          cast<CXXRecordDecl>(record->getOriginalDecl())->getDefinitionOrSelf();
+      const auto *rd =
+          cast<CXXRecordDecl>(record->getDecl())->getDefinitionOrSelf();
       if (rd->hasAttr<WeakAttr>())
         return cir::GlobalLinkageKind::WeakODRLinkage;
 
@@ -1382,9 +1378,8 @@ mlir::Attribute CIRGenItaniumRTTIBuilder::buildTypeInfo(
     break;
 
   case Type::Record: {
-    const auto *rd =
-        cast<CXXRecordDecl>(cast<RecordType>(ty)->getOriginalDecl())
-            ->getDefinitionOrSelf();
+    const auto *rd = cast<CXXRecordDecl>(cast<RecordType>(ty)->getDecl())
+                         ->getDefinitionOrSelf();
     if (!rd->hasDefinition() || !rd->getNumBases()) {
       // We don't need to emit any fields.
       break;
@@ -1651,8 +1646,7 @@ void CIRGenItaniumCXXABI::emitThrow(CIRGenFunction &cgf,
   // Lowering pass to skip passing the trivial function.
   //
   if (const RecordType *recordTy = clangThrowType->getAs<RecordType>()) {
-    CXXRecordDecl *rec =
-        cast<CXXRecordDecl>(recordTy->getOriginalDecl()->getDefinition());
+    auto *rec = cast<CXXRecordDecl>(recordTy->getDecl()->getDefinition());
     assert(!cir::MissingFeatures::isTrivialCtorOrDtor());
     if (!rec->hasTrivialDestructor()) {
       cgm.errorNYI("emitThrow: non-trivial destructor");
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 82b1051..57c7a44 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -88,6 +88,8 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
   FP80Ty = cir::FP80Type::get(&getMLIRContext());
   FP128Ty = cir::FP128Type::get(&getMLIRContext());
 
+  AllocaInt8PtrTy = cir::PointerType::get(UInt8Ty, cirAllocaAddressSpace);
+
   PointerAlignInBytes =
       astContext
           .toCharUnitsFromBits(
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index ce14aa8..f638d39 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -398,6 +398,7 @@ void OpenACCRecipeBuilderBase::createRecipeDestroySection(
     emitDestroy(block->getArgument(1), elementTy);
   }
 
+  ls.forceCleanup();
   mlir::acc::YieldOp::create(builder, locEnd);
 }
 void OpenACCRecipeBuilderBase::makeBoundsInit(
@@ -480,6 +481,7 @@ void OpenACCRecipeBuilderBase::createInitRecipe(
                      /*isInitSection=*/true);
   }
 
+  ls.forceCleanup();
   mlir::acc::YieldOp::create(builder, locEnd);
 }
 
@@ -518,6 +520,7 @@ void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy(
   cgf.emitAutoVarInit(tempDeclEmission);
 
   builder.setInsertionPointToEnd(&copyRegion.back());
+  ls.forceCleanup();
   mlir::acc::YieldOp::create(builder, locEnd);
 }
 
@@ -662,6 +665,7 @@ void OpenACCRecipeBuilderBase::createReductionRecipeCombiner(
   }
 
   builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
+  ls.forceCleanup();
   mlir::acc::YieldOp::create(builder, locEnd, block->getArgument(0));
 }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h
index 273ec7f..b5612d9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h
@@ -65,6 +65,9 @@ struct CIRGenTypeCache {
   cir::PointerType VoidPtrTy;
   cir::PointerType UInt8PtrTy;
 
+  /// void* in alloca address space
+  cir::PointerType AllocaInt8PtrTy;
+
   /// The size and alignment of a pointer into the generic address space.
   union {
     unsigned char PointerAlignInBytes;
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index 2ab1ea0c..d1b91d0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -159,7 +159,7 @@ isSafeToConvert(const RecordDecl *rd, CIRGenTypes &cgt,
     for (const clang::CXXBaseSpecifier &i : crd->bases())
       if (!isSafeToConvert(i.getType()
                                ->castAs<RecordType>()
-                               ->getOriginalDecl()
+                               ->getDecl()
                                ->getDefinitionOrSelf(),
                            cgt, alreadyChecked))
         return false;
@@ -279,8 +279,7 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
 
   // Process record types before the type cache lookup.
   if (const auto *recordType = dyn_cast<RecordType>(type))
-    return convertRecordDeclType(
-        recordType->getOriginalDecl()->getDefinitionOrSelf());
+    return convertRecordDeclType(recordType->getDecl()->getDefinitionOrSelf());
 
   // Has the type already been processed?
   TypeCacheTy::iterator tci = typeCache.find(ty);
@@ -421,6 +420,16 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
     break;
   }
 
+  case Type::VariableArray: {
+    const VariableArrayType *a = cast<VariableArrayType>(ty);
+    if (a->getIndexTypeCVRQualifiers() != 0)
+      cgm.errorNYI(SourceLocation(), "non trivial array types", type);
+    // VLAs resolve to the innermost element type; this matches
+    // the return of alloca, and there isn't any obviously better choice.
+    resultType = convertTypeForMem(a->getElementType());
+    break;
+  }
+
   case Type::IncompleteArray: {
     const IncompleteArrayType *arrTy = cast<IncompleteArrayType>(ty);
     if (arrTy->getIndexTypeCVRQualifiers() != 0)
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 3abba3d..e61b65f 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1793,12 +1793,20 @@ CIRToLLVMGlobalOpLowering::getComdatAttr(cir::GlobalOp &op,
   if (!comdatOp) {
     builder.setInsertionPointToStart(module.getBody());
     comdatOp =
-        builder.create<mlir::LLVM::ComdatOp>(module.getLoc(), comdatName);
+        mlir::LLVM::ComdatOp::create(builder, module.getLoc(), comdatName);
+  }
+
+  if (auto comdatSelector = comdatOp.lookupSymbol<mlir::LLVM::ComdatSelectorOp>(
+          op.getSymName())) {
+    return mlir::SymbolRefAttr::get(
+        builder.getContext(), comdatName,
+        mlir::FlatSymbolRefAttr::get(comdatSelector.getSymNameAttr()));
   }
 
   builder.setInsertionPointToStart(&comdatOp.getBody().back());
-  auto selectorOp = builder.create<mlir::LLVM::ComdatSelectorOp>(
-      comdatOp.getLoc(), op.getSymName(), mlir::LLVM::comdat::Comdat::Any);
+  auto selectorOp = mlir::LLVM::ComdatSelectorOp::create(
+      builder, comdatOp.getLoc(), op.getSymName(),
+      mlir::LLVM::comdat::Comdat::Any);
   return mlir::SymbolRefAttr::get(
       builder.getContext(), comdatName,
       mlir::FlatSymbolRefAttr::get(selectorOp.getSymNameAttr()));
diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp
index 13c837a..1e3ac2e 100644
--- a/clang/lib/CodeGen/ABIInfoImpl.cpp
+++ b/clang/lib/CodeGen/ABIInfoImpl.cpp
@@ -105,7 +105,7 @@ llvm::Type *CodeGen::getVAListElementType(CodeGenFunction &CGF) {
 
 CGCXXABI::RecordArgABI CodeGen::getRecordArgABI(const RecordType *RT,
                                                 CGCXXABI &CXXABI) {
-  const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
   if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
     return CXXABI.getRecordArgABI(CXXRD);
   if (!RD->canPassInRegisters())
@@ -136,7 +136,7 @@ bool CodeGen::classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
 
 QualType CodeGen::useFirstFieldIfTransparentUnion(QualType Ty) {
   if (const RecordType *UT = Ty->getAsUnionType()) {
-    const RecordDecl *UD = UT->getOriginalDecl()->getDefinitionOrSelf();
+    const RecordDecl *UD = UT->getDecl()->getDefinitionOrSelf();
     if (UD->hasAttr<TransparentUnionAttr>()) {
       assert(!UD->field_empty() && "sema created an empty transparent union");
       return UD->field_begin()->getType();
@@ -274,7 +274,7 @@ bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD,
   // according to the Itanium ABI.  The exception applies only to records,
   // not arrays of records, so we must also check whether we stripped off an
   // array type above.
-  if (isa<CXXRecordDecl>(RT->getOriginalDecl()) &&
+  if (isa<CXXRecordDecl>(RT->getDecl()) &&
       (WasArray || (!AsIfNoUniqueAddr && !FD->hasAttr<NoUniqueAddressAttr>())))
     return false;
 
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index f8e8086..6020684 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1200,7 +1200,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
       }
     }
 
-    if (shouldEmitUnifiedLTOModueFlag())
+    if (shouldEmitUnifiedLTOModueFlag() &&
+        !TheModule->getModuleFlag("UnifiedLTO"))
       TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
   }
 
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index df28641..741fa44 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1896,7 +1896,7 @@ bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context,
   // complex destructor or a non-trivially copyable type.
   if (const RecordType *RT =
           ReturnType.getCanonicalType()->getAsCanonical<RecordType>()) {
-    if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getOriginalDecl()))
+    if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
       return ClassDecl->hasTrivialDestructor();
   }
   return ReturnType.isTriviallyCopyableType(Context);
@@ -3853,7 +3853,7 @@ static void setUsedBits(CodeGenModule &CGM, const RecordType *RTy, int Offset,
                         SmallVectorImpl<uint64_t> &Bits) {
   ASTContext &Context = CGM.getContext();
   int CharWidth = Context.getCharWidth();
-  const RecordDecl *RD = RTy->getOriginalDecl()->getDefinition();
+  const RecordDecl *RD = RTy->getDecl()->getDefinition();
   const ASTRecordLayout &ASTLayout = Context.getASTRecordLayout(RD);
   const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(RD);
 
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index f31f0a2..f782b0c 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -2026,7 +2026,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
 
     // Anonymous union members do not have their destructors called.
     const RecordType *RT = type->getAsUnionType();
-    if (RT && RT->getOriginalDecl()->isAnonymousStructOrUnion())
+    if (RT && RT->getDecl()->isAnonymousStructOrUnion())
       continue;
 
     CleanupKind cleanupKind = getCleanupKind(dtorKind);
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 9fe9a13..85c70de 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1290,7 +1290,7 @@ static bool needsTypeIdentifier(const TagDecl *TD, CodeGenModule &CGM,
 static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM,
                                           llvm::DICompileUnit *TheCU) {
   SmallString<256> Identifier;
-  const TagDecl *TD = Ty->getOriginalDecl()->getDefinitionOrSelf();
+  const TagDecl *TD = Ty->getDecl()->getDefinitionOrSelf();
 
   if (!needsTypeIdentifier(TD, CGM, TheCU))
     return Identifier;
@@ -1326,7 +1326,7 @@ static llvm::dwarf::Tag getTagForRecord(const RecordDecl *RD) {
 llvm::DICompositeType *
 CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
                                       llvm::DIScope *Ctx) {
-  const RecordDecl *RD = Ty->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *RD = Ty->getDecl()->getDefinitionOrSelf();
   if (llvm::DIType *T = getTypeOrNull(QualType(Ty, 0)))
     return cast<llvm::DICompositeType>(T);
   llvm::DIFile *DefUnit = getOrCreateFile(RD->getLocation());
@@ -2402,7 +2402,7 @@ void CGDebugInfo::CollectCXXBasesAux(
   for (const auto &BI : Bases) {
     const auto *Base =
         cast<CXXRecordDecl>(
-            BI.getType()->castAsCanonical<RecordType>()->getOriginalDecl())
+            BI.getType()->castAsCanonical<RecordType>()->getDecl())
             ->getDefinition();
     if (!SeenTypes.insert(Base).second)
       continue;
@@ -3077,7 +3077,7 @@ void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
 }
 
 llvm::DIType *CGDebugInfo::CreateType(const RecordType *Ty) {
-  RecordDecl *RD = Ty->getOriginalDecl()->getDefinitionOrSelf();
+  RecordDecl *RD = Ty->getDecl()->getDefinitionOrSelf();
   llvm::DIType *T = cast_or_null<llvm::DIType>(getTypeOrNull(QualType(Ty, 0)));
   if (T || shouldOmitDefinition(DebugKind, DebugTypeExtRefs, RD,
                                 CGM.getLangOpts())) {
@@ -3105,7 +3105,7 @@ llvm::DIType *CGDebugInfo::GetPreferredNameType(const CXXRecordDecl *RD,
 
 std::pair<llvm::DIType *, llvm::DIType *>
 CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
-  RecordDecl *RD = Ty->getOriginalDecl()->getDefinitionOrSelf();
+  RecordDecl *RD = Ty->getDecl()->getDefinitionOrSelf();
 
   // Get overall information about the record type for the debug info.
   llvm::DIFile *DefUnit = getOrCreateFile(RD->getLocation());
@@ -3748,7 +3748,7 @@ llvm::DIType *CGDebugInfo::CreateType(const HLSLInlineSpirvType *Ty,
 
 static auto getEnumInfo(CodeGenModule &CGM, llvm::DICompileUnit *TheCU,
                         const EnumType *Ty) {
-  const EnumDecl *ED = Ty->getOriginalDecl()->getDefinitionOrSelf();
+  const EnumDecl *ED = Ty->getDecl()->getDefinitionOrSelf();
 
   uint64_t Size = 0;
   uint32_t Align = 0;
@@ -4151,7 +4151,7 @@ CGDebugInfo::getOrCreateLimitedType(const RecordType *Ty) {
 
 // TODO: Currently used for context chains when limiting debug info.
 llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
-  RecordDecl *RD = Ty->getOriginalDecl()->getDefinitionOrSelf();
+  RecordDecl *RD = Ty->getDecl()->getDefinitionOrSelf();
 
   // Get overall information about the record type for the debug info.
   StringRef RDName = getClassName(RD);
@@ -4238,8 +4238,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
     break;
   }
 
-  if (auto *CTSD =
-          dyn_cast<ClassTemplateSpecializationDecl>(Ty->getOriginalDecl())) {
+  if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(Ty->getDecl())) {
     CXXRecordDecl *TemplateDecl =
         CTSD->getSpecializedTemplate()->getTemplatedDecl();
     RegionMap[TemplateDecl].reset(RealDecl);
@@ -5141,7 +5140,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
   } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) {
     // If VD is an anonymous union then Storage represents value for
     // all union fields.
-    const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+    const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
     if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
       // GDB has trouble finding local variables in anonymous unions, so we emit
       // artificial local variables for each of the members.
@@ -5691,9 +5690,8 @@ llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls(
     // Ignore unnamed fields, but recurse into anonymous records.
     if (FieldName.empty()) {
       if (const auto *RT = dyn_cast<RecordType>(Field->getType()))
-        GVE =
-            CollectAnonRecordDecls(RT->getOriginalDecl()->getDefinitionOrSelf(),
-                                   Unit, LineNo, LinkageName, Var, DContext);
+        GVE = CollectAnonRecordDecls(RT->getDecl()->getDefinitionOrSelf(), Unit,
+                                     LineNo, LinkageName, Var, DContext);
       continue;
     }
     // Use VarDecl's Tag, Scope and Line number.
@@ -5712,7 +5710,7 @@ static bool ReferencesAnonymousEntity(RecordType *RT) {
   // But so long as it's not one of those, it doesn't matter if some sub-type
   // of the record (a template parameter) can't be reconstituted - because the
   // un-reconstitutable type itself will carry its own name.
-  const auto *RD = dyn_cast<CXXRecordDecl>(RT->getOriginalDecl());
+  const auto *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
   if (!RD)
     return false;
   if (!RD->getIdentifier())
@@ -5774,7 +5772,7 @@ struct ReconstitutableType : public RecursiveASTVisitor<ReconstitutableType> {
   bool TraverseEnumType(EnumType *ET, bool = false) {
     // Unnamed enums can't be reconstituted due to a lack of column info we
     // produce in the DWARF, so we can't get Clang's full name back.
-    if (const auto *ED = dyn_cast<EnumDecl>(ET->getOriginalDecl())) {
+    if (const auto *ED = dyn_cast<EnumDecl>(ET->getDecl())) {
       if (!ED->getIdentifier()) {
         Reconstitutable = false;
         return false;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e8255b0..8439ec7 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2011,7 +2011,7 @@ static bool isConstantEmittableObjectType(QualType type) {
   // Otherwise, all object types satisfy this except C++ classes with
   // mutable subobjects or non-trivial copy/destroy behavior.
   if (const auto *RT = dyn_cast<RecordType>(type))
-    if (const auto *RD = dyn_cast<CXXRecordDecl>(RT->getOriginalDecl())) {
+    if (const auto *RD = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
       RD = RD->getDefinitionOrSelf();
       if (RD->hasMutableFields() || !RD->isTrivial())
         return false;
@@ -4564,7 +4564,7 @@ static bool IsPreserveAIArrayBase(CodeGenFunction &CGF, const Expr *ArrayBase) {
     const auto *PointeeT = PtrT->getPointeeType()
                              ->getUnqualifiedDesugaredType();
     if (const auto *RecT = dyn_cast<RecordType>(PointeeT))
-      return RecT->getOriginalDecl()
+      return RecT->getDecl()
           ->getMostRecentDecl()
           ->hasAttr<BPFPreserveAccessIndexAttr>();
     return false;
@@ -7008,7 +7008,7 @@ void CodeGenFunction::FlattenAccessAndTypeLValue(
         WorkList.emplace_back(LVal, CAT->getElementType(), IdxListCopy);
       }
     } else if (const auto *RT = dyn_cast<RecordType>(T)) {
-      const RecordDecl *Record = RT->getOriginalDecl()->getDefinitionOrSelf();
+      const RecordDecl *Record = RT->getDecl()->getDefinitionOrSelf();
       assert(!Record->isUnion() && "Union types not supported in flat cast.");
 
       const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(Record);
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 07b9aeb..eee397f 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -2080,7 +2080,7 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
   // referencee.  InitListExprs for unions and arrays can't have references.
   if (const RecordType *RT = E->getType()->getAsCanonical<RecordType>()) {
     if (!RT->isUnionType()) {
-      RecordDecl *SD = RT->getOriginalDecl()->getDefinitionOrSelf();
+      RecordDecl *SD = RT->getDecl()->getDefinitionOrSelf();
       CharUnits NumNonZeroBytes = CharUnits::Zero();
 
       unsigned ILEElement = 0;
@@ -2133,7 +2133,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
     if (const RecordType *RT = CGF.getContext()
                                    .getBaseElementType(E->getType())
                                    ->getAsCanonical<RecordType>()) {
-      const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getOriginalDecl());
+      const auto *RD = cast<CXXRecordDecl>(RT->getDecl());
       if (RD->hasUserDeclaredConstructor())
         return;
     }
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 31ac266..14d8db3 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -1236,8 +1236,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
   if (auto *ILE = dyn_cast<InitListExpr>(Init)) {
     if (const RecordType *RType =
             ILE->getType()->getAsCanonical<RecordType>()) {
-      if (RType->getOriginalDecl()->isStruct()) {
-        const RecordDecl *RD = RType->getOriginalDecl()->getDefinitionOrSelf();
+      if (RType->getDecl()->isStruct()) {
+        const RecordDecl *RD = RType->getDecl()->getDefinitionOrSelf();
         unsigned NumElements = 0;
         if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
           NumElements = CXXRD->getNumBases();
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 715160d..714192d 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3583,7 +3583,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
       }
 
       const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(
-          CurrentType->castAsCanonical<RecordType>()->getOriginalDecl());
+          CurrentType->castAsCanonical<RecordType>()->getDecl());
 
       // Save the element type.
       CurrentType = ON.getBase()->getType();
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 603cef9..ecab933 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -519,6 +519,10 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
   if (CGM.getCodeGenOpts().OptimizationLevel == 0)
     Fn->addFnAttr(llvm::Attribute::OptimizeNone);
   Fn->addFnAttr(llvm::Attribute::NoInline);
+
+  if (CGM.getLangOpts().HLSLSpvEnableMaximalReconvergence) {
+    Fn->addFnAttr("enable-maximal-reconvergence", "true");
+  }
 }
 
 static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {
diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp
index 2d70e4c..0a383c8 100644
--- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp
+++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -464,8 +464,7 @@ template <class Derived> struct GenFuncBase {
 
       if (WrongType) {
         std::string FuncName = std::string(F->getName());
-        SourceLocation Loc =
-            QT->castAs<RecordType>()->getOriginalDecl()->getLocation();
+        SourceLocation Loc = QT->castAs<RecordType>()->getDecl()->getLocation();
         CGM.Error(Loc, "special function " + FuncName +
                            " for non-trivial C struct has incorrect type");
         return nullptr;
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index dbcce9b..c571821a 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -2495,7 +2495,7 @@ void CGObjCCommonMac::BuildRCBlockVarRecordLayout(const RecordType *RT,
                                                   CharUnits BytePos,
                                                   bool &HasUnion,
                                                   bool ByrefLayout) {
-  const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
   SmallVector<const FieldDecl *, 16> Fields(RD->fields());
   llvm::Type *Ty = CGM.getTypes().ConvertType(QualType(RT, 0));
   const llvm::StructLayout *RecLayout =
@@ -5184,7 +5184,7 @@ CGObjCCommonMac::GetIvarLayoutName(IdentifierInfo *Ident,
 }
 
 void IvarLayoutBuilder::visitRecord(const RecordType *RT, CharUnits offset) {
-  const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
 
   // If this is a union, remember that we had one, because it might mess
   // up the ordering of layout entries.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 8d019d4..c5eb14e 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2348,7 +2348,7 @@ static QualType GeneralizeTransparentUnion(QualType Ty) {
   const RecordType *UT = Ty->getAsUnionType();
   if (!UT)
     return Ty;
-  const RecordDecl *UD = UT->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *UD = UT->getDecl()->getDefinitionOrSelf();
   if (!UD->hasAttr<TransparentUnionAttr>())
     return Ty;
   for (const auto *it : UD->fields()) {
@@ -4230,7 +4230,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
 static bool HasNonDllImportDtor(QualType T) {
   if (const auto *RT =
           T->getBaseElementTypeUnsafe()->getAsCanonical<RecordType>())
-    if (auto *RD = dyn_cast<CXXRecordDecl>(RT->getOriginalDecl())) {
+    if (auto *RD = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
       RD = RD->getDefinitionOrSelf();
       if (RD->getDestructor() && !RD->getDestructor()->hasAttr<DLLImportAttr>())
         return true;
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index f8c7d64..4e29d8a 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -310,7 +310,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
       // This also covers anonymous structs and unions, which have a different
       // compatibility rule, but it doesn't matter because you can never have a
       // pointer to an anonymous struct or union.
-      if (!RT->getOriginalDecl()->getDeclName())
+      if (!RT->getDecl()->getDeclName())
         return getAnyPtr(PtrDepth);
 
       // For non-builtin types use the mangled name of the canonical type.
@@ -332,7 +332,7 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
   // Enum types are distinct types. In C++ they have "underlying types",
   // however they aren't related for TBAA.
   if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) {
-    const EnumDecl *ED = ETy->getOriginalDecl()->getDefinitionOrSelf();
+    const EnumDecl *ED = ETy->getDecl()->getDefinitionOrSelf();
     if (!Features.CPlusPlus)
       return getTypeInfo(ED->getIntegerType());
 
@@ -433,7 +433,7 @@ CodeGenTBAA::CollectFields(uint64_t BaseOffset,
           llvm::MDBuilder::TBAAStructField(BaseOffset, Size, TBAATag));
       return true;
     }
-    const RecordDecl *RD = TTy->getOriginalDecl()->getDefinition();
+    const RecordDecl *RD = TTy->getDecl()->getDefinition();
     if (RD->hasFlexibleArrayMember())
       return false;
 
@@ -514,7 +514,7 @@ CodeGenTBAA::getTBAAStructInfo(QualType QTy) {
 
 llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
   if (auto *TTy = dyn_cast<RecordType>(Ty)) {
-    const RecordDecl *RD = TTy->getOriginalDecl()->getDefinition();
+    const RecordDecl *RD = TTy->getDecl()->getDefinition();
     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
     using TBAAStructField = llvm::MDBuilder::TBAAStructField;
     SmallVector<TBAAStructField, 4> Fields;
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 3ffe999..ea31195 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -373,8 +373,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
   }
 
   // RecordTypes are cached and processed specially.
-  if (const RecordType *RT = dyn_cast<RecordType>(Ty))
-    return ConvertRecordDeclType(RT->getOriginalDecl()->getDefinitionOrSelf());
+  if (const auto *RT = dyn_cast<RecordType>(Ty))
+    return ConvertRecordDeclType(RT->getDecl()->getDefinitionOrSelf());
 
   llvm::Type *CachedType = nullptr;
   auto TCI = TypeCache.find(Ty);
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 7dc2eaf..9e195a9 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -3816,7 +3816,7 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
 
   if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
     const CXXRecordDecl *RD =
-        cast<CXXRecordDecl>(RecordTy->getOriginalDecl())->getDefinitionOrSelf();
+        cast<CXXRecordDecl>(RecordTy->getDecl())->getDefinitionOrSelf();
     if (!RD->hasDefinition())
       return false;
 
@@ -3850,9 +3850,7 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
 
 /// IsIncompleteClassType - Returns whether the given record type is incomplete.
 static bool IsIncompleteClassType(const RecordType *RecordTy) {
-  return !RecordTy->getOriginalDecl()
-              ->getDefinitionOrSelf()
-              ->isCompleteDefinition();
+  return !RecordTy->getDecl()->getDefinitionOrSelf()->isCompleteDefinition();
 }
 
 /// ContainsIncompleteClassType - Returns whether the given type contains an
@@ -3985,9 +3983,8 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty,
     break;
 
   case Type::Record: {
-    const CXXRecordDecl *RD =
-        cast<CXXRecordDecl>(cast<RecordType>(Ty)->getOriginalDecl())
-            ->getDefinitionOrSelf();
+    const auto *RD = cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl())
+                         ->getDefinitionOrSelf();
 
     if (!RD->hasDefinition() || !RD->getNumBases()) {
       VTableName = ClassTypeInfo;
@@ -4109,8 +4106,8 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
       return llvm::GlobalValue::LinkOnceODRLinkage;
 
     if (const RecordType *Record = dyn_cast<RecordType>(Ty)) {
-      const CXXRecordDecl *RD =
-          cast<CXXRecordDecl>(Record->getOriginalDecl())->getDefinitionOrSelf();
+      const auto *RD =
+          cast<CXXRecordDecl>(Record->getDecl())->getDefinitionOrSelf();
       if (RD->hasAttr<WeakAttr>())
         return llvm::GlobalValue::WeakODRLinkage;
       if (CGM.getTriple().isWindowsItaniumEnvironment())
@@ -4273,9 +4270,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
     break;
 
   case Type::Record: {
-    const CXXRecordDecl *RD =
-        cast<CXXRecordDecl>(cast<RecordType>(Ty)->getOriginalDecl())
-            ->getDefinitionOrSelf();
+    const auto *RD = cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl())
+                         ->getDefinitionOrSelf();
     if (!RD->hasDefinition() || !RD->getNumBases()) {
       // We don't need to emit any fields.
       break;
@@ -4322,8 +4318,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
   if (CGM.getTarget().hasPS4DLLImportExport() &&
       GVDLLStorageClass != llvm::GlobalVariable::DLLExportStorageClass) {
     if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
-      const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getOriginalDecl())
-                                    ->getDefinitionOrSelf();
+      const auto *RD =
+          cast<CXXRecordDecl>(RecordTy->getDecl())->getDefinitionOrSelf();
       if (RD->hasAttr<DLLExportAttr>() ||
           CXXRecordNonInlineHasAttr<DLLExportAttr>(RD))
         GVDLLStorageClass = llvm::GlobalVariable::DLLExportStorageClass;
diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp
index 4d894fd..20965430 100644
--- a/clang/lib/CodeGen/SwiftCallingConv.cpp
+++ b/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -66,7 +66,7 @@ void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) {
 
   // Record types.
   if (auto recType = type->getAsCanonical<RecordType>()) {
-    addTypedData(recType->getOriginalDecl(), begin);
+    addTypedData(recType->getDecl(), begin);
 
     // Array types.
   } else if (type->isArrayType()) {
@@ -814,7 +814,7 @@ static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type,
                                bool forReturn) {
   unsigned IndirectAS = CGM.getDataLayout().getAllocaAddrSpace();
   if (auto recordType = dyn_cast<RecordType>(type)) {
-    auto record = recordType->getOriginalDecl();
+    auto record = recordType->getDecl();
     auto &layout = CGM.getContext().getASTRecordLayout(record);
 
     if (mustPassRecordIndirectly(CGM, record))
@@ -822,8 +822,7 @@ static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type,
                                      /*AddrSpace=*/IndirectAS, /*byval=*/false);
 
     SwiftAggLowering lowering(CGM);
-    lowering.addTypedData(recordType->getOriginalDecl(), CharUnits::Zero(),
-                          layout);
+    lowering.addTypedData(recordType->getDecl(), CharUnits::Zero(), layout);
     lowering.finish();
 
     return classifyExpandedType(lowering, forReturn, layout.getAlignment(),
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index d7deece..bb41a14 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -743,7 +743,7 @@ bool AArch64ABIInfo::passAsPureScalableType(
       return false;
 
     // Pure scalable types are never unions and never contain unions.
-    const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+    const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
     if (RD->isUnion())
       return false;
 
diff --git a/clang/lib/CodeGen/Targets/ARC.cpp b/clang/lib/CodeGen/Targets/ARC.cpp
index 6727587..6c9444d 100644
--- a/clang/lib/CodeGen/Targets/ARC.cpp
+++ b/clang/lib/CodeGen/Targets/ARC.cpp
@@ -112,8 +112,7 @@ ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty,
 
   if (isAggregateTypeForABI(Ty)) {
     // Structures with flexible arrays are always indirect.
-    if (RT &&
-        RT->getOriginalDecl()->getDefinitionOrSelf()->hasFlexibleArrayMember())
+    if (RT && RT->getDecl()->getDefinitionOrSelf()->hasFlexibleArrayMember())
       return getIndirectByValue(Ty);
 
     // Ignore empty structs/unions.
diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp
index c84c9f2..4d05217 100644
--- a/clang/lib/CodeGen/Targets/ARM.cpp
+++ b/clang/lib/CodeGen/Targets/ARM.cpp
@@ -516,7 +516,7 @@ static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
   if (!RT) return false;
 
   // Ignore records with flexible arrays.
-  const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
   if (RD->hasFlexibleArrayMember())
     return false;
 
diff --git a/clang/lib/CodeGen/Targets/Lanai.cpp b/clang/lib/CodeGen/Targets/Lanai.cpp
index e76431a..871a135 100644
--- a/clang/lib/CodeGen/Targets/Lanai.cpp
+++ b/clang/lib/CodeGen/Targets/Lanai.cpp
@@ -102,8 +102,7 @@ ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
 
   if (isAggregateTypeForABI(Ty)) {
     // Structures with flexible arrays are always indirect.
-    if (RT &&
-        RT->getOriginalDecl()->getDefinitionOrSelf()->hasFlexibleArrayMember())
+    if (RT && RT->getDecl()->getDefinitionOrSelf()->hasFlexibleArrayMember())
       return getIndirectResult(Ty, /*ByVal=*/true, State);
 
     // Ignore empty structs/unions.
diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp
index 1f344d6..878723d 100644
--- a/clang/lib/CodeGen/Targets/LoongArch.cpp
+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp
@@ -150,7 +150,7 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper(
     // Non-zero-length arrays of empty records make the struct ineligible to be
     // passed via FARs in C++.
     if (const auto *RTy = EltTy->getAsCanonical<RecordType>()) {
-      if (ArraySize != 0 && isa<CXXRecordDecl>(RTy->getOriginalDecl()) &&
+      if (ArraySize != 0 && isa<CXXRecordDecl>(RTy->getDecl()) &&
           isEmptyRecord(getContext(), EltTy, true, true))
         return false;
     }
@@ -169,7 +169,7 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper(
     // copy constructor are not eligible for the FP calling convention.
     if (getRecordArgABI(Ty, CGT.getCXXABI()))
       return false;
-    const RecordDecl *RD = RTy->getOriginalDecl()->getDefinitionOrSelf();
+    const RecordDecl *RD = RTy->getDecl()->getDefinitionOrSelf();
     if (isEmptyRecord(getContext(), Ty, true, true) &&
         (!RD->isUnion() || !isa<CXXRecordDecl>(RD)))
       return true;
diff --git a/clang/lib/CodeGen/Targets/Mips.cpp b/clang/lib/CodeGen/Targets/Mips.cpp
index f26ab97..22fdcd9 100644
--- a/clang/lib/CodeGen/Targets/Mips.cpp
+++ b/clang/lib/CodeGen/Targets/Mips.cpp
@@ -161,7 +161,7 @@ llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
     return llvm::StructType::get(getVMContext(), ArgList);
   }
 
-  const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
   const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
   assert(!(TySize % 8) && "Size of structure must be multiple of 8.");
 
@@ -265,7 +265,7 @@ MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const {
   SmallVector<llvm::Type*, 8> RTList;
 
   if (RT && RT->isStructureOrClassType()) {
-    const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+    const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
     unsigned FieldCnt = Layout.getFieldCount();
 
diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp
index 0d0941e..d134589 100644
--- a/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -234,7 +234,7 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
     // Non-zero-length arrays of empty records make the struct ineligible for
     // the FP calling convention in C++.
     if (const auto *RTy = EltTy->getAsCanonical<RecordType>()) {
-      if (ArraySize != 0 && isa<CXXRecordDecl>(RTy->getOriginalDecl()) &&
+      if (ArraySize != 0 && isa<CXXRecordDecl>(RTy->getDecl()) &&
           isEmptyRecord(getContext(), EltTy, true, true))
         return false;
     }
@@ -256,7 +256,7 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
       return false;
     if (isEmptyRecord(getContext(), Ty, true, true))
       return true;
-    const RecordDecl *RD = RTy->getOriginalDecl()->getDefinitionOrSelf();
+    const RecordDecl *RD = RTy->getDecl()->getDefinitionOrSelf();
     // Unions aren't eligible unless they're empty (which is caught above).
     if (RD->isUnion())
       return false;
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index fb78948..8daf8eb 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -795,8 +795,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State,
   if (isAggregateTypeForABI(Ty)) {
     // Structures with flexible arrays are always indirect.
     // FIXME: This should not be byval!
-    if (RT &&
-        RT->getOriginalDecl()->getDefinitionOrSelf()->hasFlexibleArrayMember())
+    if (RT && RT->getDecl()->getDefinitionOrSelf()->hasFlexibleArrayMember())
       return getIndirectResult(Ty, true, State);
 
     // Ignore empty structs/unions on non-Windows.
@@ -831,7 +830,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State,
       unsigned AlignInBits = 0;
       if (RT) {
         const ASTRecordLayout &Layout =
-            getContext().getASTRecordLayout(RT->getOriginalDecl());
+            getContext().getASTRecordLayout(RT->getDecl());
         AlignInBits = getContext().toBits(Layout.getRequiredAlignment());
       } else if (TI.isAlignRequired()) {
         AlignInBits = TI.Align;
@@ -2042,7 +2041,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
     if (getRecordArgABI(RT, getCXXABI()))
       return;
 
-    const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+    const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
 
     // Assume variable sized types are passed in memory.
     if (RD->hasFlexibleArrayMember())
@@ -2851,9 +2850,8 @@ ABIArgInfo
 X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
                                              unsigned &NeededSSE,
                                              unsigned &MaxVectorWidth) const {
-  auto *RD = cast<RecordType>(Ty.getCanonicalType())
-                 ->getOriginalDecl()
-                 ->getDefinitionOrSelf();
+  auto *RD =
+      cast<RecordType>(Ty.getCanonicalType())->getDecl()->getDefinitionOrSelf();
 
   if (RD->hasFlexibleArrayMember())
     return getIndirectReturnResult(Ty);
@@ -3313,7 +3311,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
                                        RAA == CGCXXABI::RAA_DirectInMemory);
     }
 
-    if (RT->getOriginalDecl()->getDefinitionOrSelf()->hasFlexibleArrayMember())
+    if (RT->getDecl()->getDefinitionOrSelf()->hasFlexibleArrayMember())
       return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
                                      /*ByVal=*/false);
   }
diff --git a/clang/lib/CodeGen/Targets/XCore.cpp b/clang/lib/CodeGen/Targets/XCore.cpp
index ab01154..f9726ec 100644
--- a/clang/lib/CodeGen/Targets/XCore.cpp
+++ b/clang/lib/CodeGen/Targets/XCore.cpp
@@ -380,7 +380,7 @@ static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
 
   // We collect all encoded fields and order as necessary.
   bool IsRecursive = false;
-  const RecordDecl *RD = RT->getOriginalDecl()->getDefinition();
+  const RecordDecl *RD = RT->getDecl()->getDefinition();
   if (RD && !RD->field_empty()) {
     // An incomplete TypeString stub is placed in the cache for this RecordType
     // so that recursive calls to this RecordType will use it whilst building a
@@ -429,7 +429,7 @@ static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
   Enc += "){";
 
   // We collect all encoded enumerations and order them alphanumerically.
-  if (const EnumDecl *ED = ET->getOriginalDecl()->getDefinition()) {
+  if (const EnumDecl *ED = ET->getDecl()->getDefinition()) {
     SmallVector<FieldEncoding, 16> FE;
     for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E;
          ++I) {
diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp
index 880e9e3..715429b 100644
--- a/clang/lib/Driver/Job.cpp
+++ b/clang/lib/Driver/Job.cpp
@@ -57,24 +57,25 @@ static bool skipArgs(const char *Flag, bool HaveCrashVFS, int &SkipNum,
   SkipNum = 2;
   // These flags are all of the form -Flag <Arg> and are treated as two
   // arguments.  Therefore, we need to skip the flag and the next argument.
-  bool ShouldSkip = llvm::StringSwitch<bool>(Flag)
-    .Cases("-MF", "-MT", "-MQ", "-serialize-diagnostic-file", true)
-    .Cases("-o", "-dependency-file", true)
-    .Cases("-fdebug-compilation-dir", "-diagnostic-log-file", true)
-    .Cases("-dwarf-debug-flags", "-ivfsoverlay", true)
-    .Default(false);
+  bool ShouldSkip =
+      llvm::StringSwitch<bool>(Flag)
+          .Cases({"-MF", "-MT", "-MQ", "-serialize-diagnostic-file"}, true)
+          .Cases({"-o", "-dependency-file"}, true)
+          .Cases({"-fdebug-compilation-dir", "-diagnostic-log-file"}, true)
+          .Cases({"-dwarf-debug-flags", "-ivfsoverlay"}, true)
+          .Default(false);
   if (ShouldSkip)
     return true;
 
   // Some include flags shouldn't be skipped if we have a crash VFS
   IsInclude =
       llvm::StringSwitch<bool>(Flag)
-          .Cases("-include", "-header-include-file", true)
-          .Cases("-idirafter", "-internal-isystem", "-iwithprefix", true)
-          .Cases("-internal-externc-isystem", "-iprefix", true)
-          .Cases("-iwithprefixbefore", "-isystem", "-iquote", true)
-          .Cases("-isysroot", "-I", "-F", "-resource-dir", true)
-          .Cases("-internal-iframework", "-iframework", "-include-pch", true)
+          .Cases({"-include", "-header-include-file"}, true)
+          .Cases({"-idirafter", "-internal-isystem", "-iwithprefix"}, true)
+          .Cases({"-internal-externc-isystem", "-iprefix"}, true)
+          .Cases({"-iwithprefixbefore", "-isystem", "-iquote"}, true)
+          .Cases({"-isysroot", "-I", "-F", "-resource-dir"}, true)
+          .Cases({"-internal-iframework", "-iframework", "-include-pch"}, true)
           .Default(false);
   if (IsInclude)
     return !HaveCrashVFS;
@@ -83,9 +84,9 @@ static bool skipArgs(const char *Flag, bool HaveCrashVFS, int &SkipNum,
 
   // These flags are all of the form -Flag and have no second argument.
   ShouldSkip = llvm::StringSwitch<bool>(Flag)
-    .Cases("-M", "-MM", "-MG", "-MP", "-MD", true)
-    .Case("-MMD", true)
-    .Default(false);
+                   .Cases({"-M", "-MM", "-MG", "-MP", "-MD"}, true)
+                   .Case("-MMD", true)
+                   .Default(false);
 
   // Match found.
   SkipNum = 1;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index bf75573..a7310ba 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3755,7 +3755,8 @@ static void RenderHLSLOptions(const ArgList &Args, ArgStringList &CmdArgs,
       options::OPT_hlsl_entrypoint,
       options::OPT_fdx_rootsignature_define,
       options::OPT_fdx_rootsignature_version,
-      options::OPT_fhlsl_spv_use_unknown_image_format};
+      options::OPT_fhlsl_spv_use_unknown_image_format,
+      options::OPT_fhlsl_spv_enable_maximal_reconvergence};
   if (!types::isHLSL(InputType))
     return;
   for (const auto &Arg : ForwardedArguments)
@@ -9214,8 +9215,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
                    options::OPT_nogpulibc)) {
     forAllAssociatedToolChains(C, JA, getToolChain(), [&](const ToolChain &TC) {
       // The device C library is only available for NVPTX and AMDGPU targets
-      // currently.
-      if (!TC.getTriple().isNVPTX() && !TC.getTriple().isAMDGPU())
+      // and we only link it by default for OpenMP currently.
+      if ((!TC.getTriple().isNVPTX() && !TC.getTriple().isAMDGPU()) ||
+          !JA.isHostOffloading(Action::OFK_OpenMP))
         return;
       bool HasLibC = TC.getStdlibIncludePath().has_value();
       if (HasLibC) {
diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index ceed7cb..0325296 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -105,8 +105,9 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
       for (const auto &P : BundleParts) {
         // TODO: Automate the generation of the string case table.
         auto Valid = llvm::StringSwitch<bool>(P)
-                         .Cases("none", "all", "function", "function-entry",
-                                "function-exit", "custom", true)
+                         .Cases({"none", "all", "function", "function-entry",
+                                 "function-exit", "custom"},
+                                true)
                          .Default(false);
 
         if (!Valid) {
diff --git a/clang/lib/ExtractAPI/DeclarationFragments.cpp b/clang/lib/ExtractAPI/DeclarationFragments.cpp
index 541af6d..e5eda46 100644
--- a/clang/lib/ExtractAPI/DeclarationFragments.cpp
+++ b/clang/lib/ExtractAPI/DeclarationFragments.cpp
@@ -422,7 +422,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType(
 
     Fragments.append(getFragmentsForNNS(TagTy->getQualifier(), Context, After));
 
-    const TagDecl *Decl = TagTy->getOriginalDecl();
+    const TagDecl *Decl = TagTy->getDecl();
     // Anonymous decl, skip this fragment.
     if (Decl->getName().empty())
       return Fragments.append("{ ... }",
diff --git a/clang/lib/ExtractAPI/TypedefUnderlyingTypeResolver.cpp b/clang/lib/ExtractAPI/TypedefUnderlyingTypeResolver.cpp
index 5adbbc6..41e4e0c 100644
--- a/clang/lib/ExtractAPI/TypedefUnderlyingTypeResolver.cpp
+++ b/clang/lib/ExtractAPI/TypedefUnderlyingTypeResolver.cpp
@@ -26,7 +26,7 @@ TypedefUnderlyingTypeResolver::getUnderlyingTypeDecl(QualType Type) const {
   if (TypedefTy)
     TypeDecl = TypedefTy->getDecl();
   if (const TagType *TagTy = Type->getAs<TagType>()) {
-    TypeDecl = TagTy->getOriginalDecl();
+    TypeDecl = TagTy->getDecl();
   } else if (const ObjCInterfaceType *ObjCITy =
                  Type->getAs<ObjCInterfaceType>()) {
     TypeDecl = ObjCITy->getDecl();
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index 54f366f..7348a3a 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -289,17 +289,20 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
                    SmallVector<WhitespaceManager::Change, 16> &Changes) {
   int Shift = 0;
 
-  // ScopeStack keeps track of the current scope depth. It contains indices of
-  // the first token on each scope.
+  // ScopeStack keeps track of the current scope depth. It contains the levels
+  // of at most 2 scopes. The first one is the one that the matched token is
+  // in. The second one is the one that should not be moved by this procedure.
   // The "Matches" indices should only have tokens from the outer-most scope.
   // However, we do need to pay special attention to one class of tokens
-  // that are not in the outer-most scope, and that is function parameters
-  // which are split across multiple lines, as illustrated by this example:
+  // that are not in the outer-most scope, and that is the continuations of an
+  // unwrapped line whose positions are derived from a token to the right of the
+  // aligned token, as illustrated by this example:
   //   double a(int x);
   //   int    b(int  y,
   //          double z);
   // In the above example, we need to take special care to ensure that
-  // 'double z' is indented along with it's owning function 'b'.
+  // 'double z' is indented along with its owning function 'b', because its
+  // position is derived from the '(' token to the right of the 'b' token.
   // The same holds for calling a function:
   //   double a = foo(x);
   //   int    b = bar(foo(y),
@@ -309,32 +312,28 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
   //   auto s   = "Hello"
   //          "World";
   // Special handling is required for 'nested' ternary operators.
-  SmallVector<unsigned, 16> ScopeStack;
+  SmallVector<std::tuple<unsigned, unsigned, unsigned>, 2> ScopeStack;
 
   for (unsigned i = Start; i != End; ++i) {
     auto &CurrentChange = Changes[i];
     if (!Matches.empty() && Matches[0] < i)
       Matches.consume_front();
     assert(Matches.empty() || Matches[0] >= i);
-    if (!ScopeStack.empty() &&
-        CurrentChange.indentAndNestingLevel() <
-            Changes[ScopeStack.back()].indentAndNestingLevel()) {
+    while (!ScopeStack.empty() &&
+           CurrentChange.indentAndNestingLevel() < ScopeStack.back()) {
       ScopeStack.pop_back();
     }
 
-    // Compare current token to previous non-comment token to ensure whether
-    // it is in a deeper scope or not.
-    unsigned PreviousNonComment = i - 1;
-    while (PreviousNonComment > Start &&
-           Changes[PreviousNonComment].Tok->is(tok::comment)) {
-      --PreviousNonComment;
-    }
-    if (i != Start && CurrentChange.indentAndNestingLevel() >
-                          Changes[PreviousNonComment].indentAndNestingLevel()) {
-      ScopeStack.push_back(i);
+    // Keep track of the level that should not move with the aligned token.
+    if (ScopeStack.size() == 1u && CurrentChange.NewlinesBefore != 0u &&
+        CurrentChange.indentAndNestingLevel() > ScopeStack[0] &&
+        !CurrentChange.IsAligned) {
+      ScopeStack.push_back(CurrentChange.indentAndNestingLevel());
     }
 
-    bool InsideNestedScope = !ScopeStack.empty();
+    bool InsideNestedScope =
+        !ScopeStack.empty() &&
+        CurrentChange.indentAndNestingLevel() > ScopeStack[0];
     bool ContinuedStringLiteral = i > Start &&
                                   CurrentChange.Tok->is(tok::string_literal) &&
                                   Changes[i - 1].Tok->is(tok::string_literal);
@@ -349,103 +348,20 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
     if (!Matches.empty() && Matches[0] == i) {
       Shift = Column - (RightJustify ? CurrentChange.TokenLength : 0) -
               CurrentChange.StartOfTokenColumn;
+      ScopeStack = {CurrentChange.indentAndNestingLevel()};
       CurrentChange.Spaces += Shift;
     }
 
     if (Shift == 0)
       continue;
 
-    // This is for function parameters that are split across multiple lines,
-    // as mentioned in the ScopeStack comment.
-    if (InsideNestedScope && CurrentChange.NewlinesBefore > 0) {
-      unsigned ScopeStart = ScopeStack.back();
-      auto ShouldShiftBeAdded = [&] {
-        // Function declaration
-        if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName))
-          return true;
-
-        // Lambda.
-        if (Changes[ScopeStart - 1].Tok->is(TT_LambdaLBrace))
-          return false;
-
-        // Continued function declaration
-        if (ScopeStart > Start + 1 &&
-            Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)) {
-          return true;
-        }
-
-        // Continued (template) function call.
-        if (ScopeStart > Start + 1 &&
-            Changes[ScopeStart - 2].Tok->isOneOf(tok::identifier,
-                                                 TT_TemplateCloser) &&
-            Changes[ScopeStart - 1].Tok->is(tok::l_paren) &&
-            Changes[ScopeStart].Tok->isNot(TT_LambdaLSquare)) {
-          if (CurrentChange.Tok->MatchingParen &&
-              CurrentChange.Tok->MatchingParen->is(TT_LambdaLBrace)) {
-            return false;
-          }
-          if (Changes[ScopeStart].NewlinesBefore > 0)
-            return false;
-          if (CurrentChange.Tok->is(tok::l_brace) &&
-              CurrentChange.Tok->is(BK_BracedInit)) {
-            return true;
-          }
-          return Style.BinPackArguments;
-        }
-
-        // Ternary operator
-        if (CurrentChange.Tok->is(TT_ConditionalExpr))
-          return true;
-
-        // Period Initializer .XXX = 1.
-        if (CurrentChange.Tok->is(TT_DesignatedInitializerPeriod))
-          return true;
-
-        // Continued ternary operator
-        if (CurrentChange.Tok->Previous &&
-            CurrentChange.Tok->Previous->is(TT_ConditionalExpr)) {
-          return true;
-        }
-
-        // Continued direct-list-initialization using braced list.
-        if (ScopeStart > Start + 1 &&
-            Changes[ScopeStart - 2].Tok->is(tok::identifier) &&
-            Changes[ScopeStart - 1].Tok->is(tok::l_brace) &&
-            CurrentChange.Tok->is(tok::l_brace) &&
-            CurrentChange.Tok->is(BK_BracedInit)) {
-          return true;
-        }
-
-        // Continued braced list.
-        if (ScopeStart > Start + 1 &&
-            Changes[ScopeStart - 2].Tok->isNot(tok::identifier) &&
-            Changes[ScopeStart - 1].Tok->is(tok::l_brace) &&
-            CurrentChange.Tok->isNot(tok::r_brace)) {
-          for (unsigned OuterScopeStart : llvm::reverse(ScopeStack)) {
-            // Lambda.
-            if (OuterScopeStart > Start &&
-                Changes[OuterScopeStart - 1].Tok->is(TT_LambdaLBrace)) {
-              return false;
-            }
-          }
-          if (Changes[ScopeStart].NewlinesBefore > 0)
-            return false;
-          return true;
-        }
-
-        // Continued template parameter.
-        if (Changes[ScopeStart - 1].Tok->is(TT_TemplateOpener))
-          return true;
-
-        return false;
-      };
-
-      if (ShouldShiftBeAdded())
-        CurrentChange.Spaces += Shift;
-    }
-
-    if (ContinuedStringLiteral)
+    // This is for lines that are split across multiple lines, as mentioned in
+    // the ScopeStack comment. The stack size being 1 means that the token is
+    // not in a scope that should not move.
+    if (ScopeStack.size() == 1u && CurrentChange.NewlinesBefore > 0 &&
+        (ContinuedStringLiteral || InsideNestedScope)) {
       CurrentChange.Spaces += Shift;
+    }
 
     // We should not remove required spaces unless we break the line before.
     assert(Shift > 0 || Changes[i].NewlinesBefore > 0 ||
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 292adce..5bd15f5 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4012,13 +4012,13 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
           auto Diag = Diags.Report(diag::note_drv_use_standard);
           Diag << Std.getName() << Std.getDescription();
           unsigned NumAliases = 0;
-#define LANGSTANDARD(id, name, lang, desc, features)
+#define LANGSTANDARD(id, name, lang, desc, features, version)
 #define LANGSTANDARD_ALIAS(id, alias) \
           if (KindValue == LangStandard::lang_##id) ++NumAliases;
 #define LANGSTANDARD_ALIAS_DEPR(id, alias)
 #include "clang/Basic/LangStandards.def"
           Diag << NumAliases;
-#define LANGSTANDARD(id, name, lang, desc, features)
+#define LANGSTANDARD(id, name, lang, desc, features, version)
 #define LANGSTANDARD_ALIAS(id, alias) \
           if (KindValue == LangStandard::lang_##id) Diag << alias;
 #define LANGSTANDARD_ALIAS_DEPR(id, alias)
diff --git a/clang/lib/Frontend/FrontendOptions.cpp b/clang/lib/Frontend/FrontendOptions.cpp
index 32ed995..fb178b6 100644
--- a/clang/lib/Frontend/FrontendOptions.cpp
+++ b/clang/lib/Frontend/FrontendOptions.cpp
@@ -14,25 +14,26 @@ using namespace clang;
 
 InputKind FrontendOptions::getInputKindForExtension(StringRef Extension) {
   return llvm::StringSwitch<InputKind>(Extension)
-      .Cases("ast", "pcm", InputKind(Language::Unknown, InputKind::Precompiled))
+      .Cases({"ast", "pcm"},
+             InputKind(Language::Unknown, InputKind::Precompiled))
       .Case("c", Language::C)
-      .Cases("S", "s", Language::Asm)
+      .Cases({"S", "s"}, Language::Asm)
       .Case("i", InputKind(Language::C).getPreprocessed())
       .Case("ii", InputKind(Language::CXX).getPreprocessed())
       .Case("cui", InputKind(Language::CUDA).getPreprocessed())
       .Case("m", Language::ObjC)
       .Case("mi", InputKind(Language::ObjC).getPreprocessed())
-      .Cases("mm", "M", Language::ObjCXX)
+      .Cases({"mm", "M"}, Language::ObjCXX)
       .Case("mii", InputKind(Language::ObjCXX).getPreprocessed())
-      .Cases("C", "cc", "cp", Language::CXX)
-      .Cases("cpp", "CPP", "c++", "cxx", "hpp", "hxx", Language::CXX)
+      .Cases({"C", "cc", "cp"}, Language::CXX)
+      .Cases({"cpp", "CPP", "c++", "cxx", "hpp", "hxx"}, Language::CXX)
       .Case("cppm", Language::CXX)
-      .Cases("iim", "iih", InputKind(Language::CXX).getPreprocessed())
+      .Cases({"iim", "iih"}, InputKind(Language::CXX).getPreprocessed())
       .Case("cl", Language::OpenCL)
       .Case("clcpp", Language::OpenCLCXX)
-      .Cases("cu", "cuh", Language::CUDA)
+      .Cases({"cu", "cuh"}, Language::CUDA)
       .Case("hip", Language::HIP)
-      .Cases("ll", "bc", Language::LLVM_IR)
+      .Cases({"ll", "bc"}, Language::LLVM_IR)
       .Case("hlsl", Language::HLSL)
       .Case("cir", Language::CIR)
       .Default(Language::Unknown);
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index b899fb9..baad6317 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -459,43 +459,12 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   //      value is, are implementation-defined.
   // (Removed in C++20.)
   if (!LangOpts.CPlusPlus) {
-    if (LangOpts.C2y)
-      Builder.defineMacro("__STDC_VERSION__", "202400L");
-    else if (LangOpts.C23)
-      Builder.defineMacro("__STDC_VERSION__", "202311L");
-    else if (LangOpts.C17)
-      Builder.defineMacro("__STDC_VERSION__", "201710L");
-    else if (LangOpts.C11)
-      Builder.defineMacro("__STDC_VERSION__", "201112L");
-    else if (LangOpts.C99)
-      Builder.defineMacro("__STDC_VERSION__", "199901L");
-    else if (!LangOpts.GNUMode && LangOpts.Digraphs)
-      Builder.defineMacro("__STDC_VERSION__", "199409L");
+    if (std::optional<uint32_t> Lang = LangOpts.getCLangStd())
+      Builder.defineMacro("__STDC_VERSION__", Twine(*Lang) + "L");
   } else {
     //   -- __cplusplus
-    if (LangOpts.CPlusPlus26)
-      // FIXME: Use correct value for C++26.
-      Builder.defineMacro("__cplusplus", "202400L");
-    else if (LangOpts.CPlusPlus23)
-      Builder.defineMacro("__cplusplus", "202302L");
-    //      [C++20] The integer literal 202002L.
-    else if (LangOpts.CPlusPlus20)
-      Builder.defineMacro("__cplusplus", "202002L");
-    //      [C++17] The integer literal 201703L.
-    else if (LangOpts.CPlusPlus17)
-      Builder.defineMacro("__cplusplus", "201703L");
-    //      [C++14] The name __cplusplus is defined to the value 201402L when
-    //      compiling a C++ translation unit.
-    else if (LangOpts.CPlusPlus14)
-      Builder.defineMacro("__cplusplus", "201402L");
-    //      [C++11] The name __cplusplus is defined to the value 201103L when
-    //      compiling a C++ translation unit.
-    else if (LangOpts.CPlusPlus11)
-      Builder.defineMacro("__cplusplus", "201103L");
-    //      [C++03] The name __cplusplus is defined to the value 199711L when
-    //      compiling a C++ translation unit.
-    else
-      Builder.defineMacro("__cplusplus", "199711L");
+    Builder.defineMacro("__cplusplus",
+                        Twine(*LangOpts.getCPlusPlusLangStd()) + "L");
 
     //   -- __STDCPP_DEFAULT_NEW_ALIGNMENT__
     //      [C++17] An integer literal of type std::size_t whose value is the
diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index 42f2d65..dee29fe 100644
--- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -852,7 +852,7 @@ RewriteModernObjC::getIvarAccessString(ObjCIvarDecl *D) {
     IvarT = GetGroupRecordTypeForObjCIvarBitfield(D);
 
   if (!IvarT->getAs<TypedefType>() && IvarT->isRecordType()) {
-    RecordDecl *RD = IvarT->castAsCanonical<RecordType>()->getOriginalDecl();
+    RecordDecl *RD = IvarT->castAsCanonical<RecordType>()->getDecl();
     RD = RD->getDefinition();
     if (RD && !RD->getDeclName().getAsIdentifierInfo()) {
       // decltype(((Foo_IMPL*)0)->bar) *
@@ -7453,8 +7453,7 @@ Stmt *RewriteModernObjC::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
         IvarT = GetGroupRecordTypeForObjCIvarBitfield(D);
 
       if (!IvarT->getAs<TypedefType>() && IvarT->isRecordType()) {
-        RecordDecl *RD =
-            IvarT->castAsCanonical<RecordType>()->getOriginalDecl();
+        RecordDecl *RD = IvarT->castAsCanonical<RecordType>()->getDecl();
         RD = RD->getDefinition();
         if (RD && !RD->getDeclName().getAsIdentifierInfo()) {
           // decltype(((Foo_IMPL*)0)->bar) *
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index e150aa6..fa7f4c2 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1852,9 +1852,8 @@ _mm256_sad_epu8(__m256i __a, __m256i __b)
 ///    control byte specify the index (within the same 128-bit half) of \a __a
 ///    to copy to the result byte.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shuffle_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_shuffle_epi8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
 }
 
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 473fe94..23b2d29 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -866,23 +866,20 @@ _mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
                                             (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_shuffle_epi8(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                          (__v64qi)_mm512_shuffle_epi8(__A, __B),
                                          (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                          (__v64qi)_mm512_shuffle_epi8(__A, __B),
                                          (__v64qi)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h
index 8899298..b161440 100644
--- a/clang/lib/Headers/avx512cdintrin.h
+++ b/clang/lib/Headers/avx512cdintrin.h
@@ -109,17 +109,14 @@ _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcastmb_epi64 (__mmask8 __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_broadcastmb_epi64(__mmask8 __A) {
   return (__m512i) _mm512_set1_epi64((long long) __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcastmw_epi32 (__mmask16 __A)
-{
-  return (__m512i) _mm512_set1_epi32((int) __A);
-
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_broadcastmw_epi32(__mmask16 __A) {
+  return (__m512i)_mm512_set1_epi32((int)__A);
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 81e4cbb9..639fb60 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -1067,33 +1067,29 @@ _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                             (__v16qi)_mm_shuffle_epi8(__A, __B),
                                             (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                             (__v16qi)_mm_shuffle_epi8(__A, __B),
                                             (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                          (__v32qi)_mm256_shuffle_epi8(__A, __B),
                                          (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                          (__v32qi)_mm256_shuffle_epi8(__A, __B),
                                          (__v32qi)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/avx512vlcdintrin.h b/clang/lib/Headers/avx512vlcdintrin.h
index 30c9f90..cb98e7c 100644
--- a/clang/lib/Headers/avx512vlcdintrin.h
+++ b/clang/lib/Headers/avx512vlcdintrin.h
@@ -29,31 +29,26 @@
 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastmb_epi64 (__mmask8 __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcastmb_epi64(__mmask8 __A) {
   return (__m128i) _mm_set1_epi64x((long long) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastmb_epi64 (__mmask8 __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastmb_epi64(__mmask8 __A) {
   return (__m256i) _mm256_set1_epi64x((long long)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastmw_epi32 (__mmask16 __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcastmw_epi32(__mmask16 __A) {
   return (__m128i) _mm_set1_epi32((int)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastmw_epi32 (__mmask16 __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastmw_epi32(__mmask16 __A) {
   return (__m256i) _mm256_set1_epi32((int)__A);
 }
 
-
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_conflict_epi64 (__m128i __A)
 {
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index 9d007c8..ee96caa 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -590,10 +590,9 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b)
 ///    Bits [6:4] Reserved.  \n
 ///    Bits [3:0] select the source byte to be copied.
 /// \returns A 128-bit integer vector containing the copied or cleared values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_shuffle_epi8(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_shuffle_epi8(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Copies the 8-bit integers from a 64-bit integer vector to the
@@ -615,13 +614,12 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
 ///    destination. \n
 ///    Bits [2:0] select the source byte to be copied.
 /// \returns A 64-bit integer vector containing the copied or cleared values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_shuffle_pi8(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_pshufb128(
-        (__v16qi)__builtin_shufflevector(
-            (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1),
-        (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_shuffle_pi8(__m64 __a, __m64 __b) {
+  return __trunc64(__builtin_ia32_pshufb128(
+      (__v16qi)__builtin_shufflevector((__v2si)(__a), __extension__(__v2si){},
+                                       0, 1, 0, 1),
+      (__v16qi)__zext128(__b)));
 }
 
 /// For each 8-bit integer in the first source operand, perform one of
diff --git a/clang/lib/Index/IndexTypeSourceInfo.cpp b/clang/lib/Index/IndexTypeSourceInfo.cpp
index 74c6c11..3c1e038 100644
--- a/clang/lib/Index/IndexTypeSourceInfo.cpp
+++ b/clang/lib/Index/IndexTypeSourceInfo.cpp
@@ -117,7 +117,7 @@ public:
   }
 
   bool VisitTagTypeLoc(TagTypeLoc TL) {
-    TagDecl *D = TL.getOriginalDecl();
+    TagDecl *D = TL.getDecl();
     if (!IndexCtx.shouldIndexFunctionLocalSymbols() &&
         D->getParentFunctionOrMethod())
       return true;
diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp
index c78d66f..08835ea 100644
--- a/clang/lib/Index/USRGeneration.cpp
+++ b/clang/lib/Index/USRGeneration.cpp
@@ -911,11 +911,10 @@ void USRGenerator::VisitType(QualType T) {
     }
     if (const TagType *TT = T->getAs<TagType>()) {
       if (const auto *ICNT = dyn_cast<InjectedClassNameType>(TT)) {
-        T = ICNT->getOriginalDecl()->getCanonicalTemplateSpecializationType(
-            Ctx);
+        T = ICNT->getDecl()->getCanonicalTemplateSpecializationType(Ctx);
       } else {
         Out << '$';
-        VisitTagDecl(TT->getOriginalDecl());
+        VisitTagDecl(TT->getDecl());
         return;
       }
     }
diff --git a/clang/lib/InstallAPI/HeaderFile.cpp b/clang/lib/InstallAPI/HeaderFile.cpp
index 0b7041e..d736a0a 100644
--- a/clang/lib/InstallAPI/HeaderFile.cpp
+++ b/clang/lib/InstallAPI/HeaderFile.cpp
@@ -38,7 +38,7 @@ std::optional<std::string> createIncludeHeaderName(const StringRef FullPath) {
 
 bool isHeaderFile(StringRef Path) {
   return StringSwitch<bool>(sys::path::extension(Path))
-      .Cases(".h", ".H", ".hh", ".hpp", ".hxx", true)
+      .Cases({".h", ".H", ".hh", ".hpp", ".hxx"}, true)
       .Default(false);
 }
 
diff --git a/clang/lib/InstallAPI/Visitor.cpp b/clang/lib/InstallAPI/Visitor.cpp
index f12e040..53fbc36 100644
--- a/clang/lib/InstallAPI/Visitor.cpp
+++ b/clang/lib/InstallAPI/Visitor.cpp
@@ -543,8 +543,8 @@ void InstallAPIVisitor::emitVTableSymbols(const CXXRecordDecl *D,
   }
 
   for (const auto &It : D->bases()) {
-    const CXXRecordDecl *Base = cast<CXXRecordDecl>(
-        It.getType()->castAs<RecordType>()->getOriginalDecl());
+    const auto *Base =
+        cast<CXXRecordDecl>(It.getType()->castAs<RecordType>()->getDecl());
     const auto BaseAccess = getAccessForDecl(Base);
     if (!BaseAccess)
       continue;
diff --git a/clang/lib/Interpreter/InterpreterValuePrinter.cpp b/clang/lib/Interpreter/InterpreterValuePrinter.cpp
index a55b7f5..0ed02f3 100644
--- a/clang/lib/Interpreter/InterpreterValuePrinter.cpp
+++ b/clang/lib/Interpreter/InterpreterValuePrinter.cpp
@@ -66,10 +66,10 @@ static std::string QualTypeToString(ASTContext &Ctx, QualType QT) {
   const QualType NonRefTy = QT.getNonReferenceType();
 
   if (const auto *TTy = llvm::dyn_cast<TagType>(NonRefTy))
-    return DeclTypeToString(NonRefTy, TTy->getOriginalDecl());
+    return DeclTypeToString(NonRefTy, TTy->getDecl());
 
   if (const auto *TRy = dyn_cast<RecordType>(NonRefTy))
-    return DeclTypeToString(NonRefTy, TRy->getOriginalDecl());
+    return DeclTypeToString(NonRefTy, TRy->getDecl());
 
   const QualType Canon = NonRefTy.getCanonicalType();
 
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 5c6ecdb..6a5e5d4 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -248,50 +248,67 @@ static bool warnByDefaultOnWrongCase(StringRef Include) {
 
   // The standard C/C++ and Posix headers
   return llvm::StringSwitch<bool>(LowerInclude)
-    // C library headers
-    .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
-    .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
-    .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
-    .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stdcountof.h", true)
-    .Cases("stddef.h", "stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)
-    .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)
-    .Cases("wchar.h", "wctype.h", true)
-
-    // C++ headers for C library facilities
-    .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
-    .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
-    .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
-    .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
-    .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
-    .Case("cwctype", true)
-
-    // C++ library headers
-    .Cases("algorithm", "fstream", "list", "regex", "thread", true)
-    .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
-    .Cases("atomic", "future", "map", "set", "type_traits", true)
-    .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
-    .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
-    .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
-    .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
-    .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
-    .Cases("deque", "istream", "queue", "string", "valarray", true)
-    .Cases("exception", "iterator", "random", "strstream", "vector", true)
-    .Cases("forward_list", "limits", "ratio", "system_error", true)
-
-    // POSIX headers (which aren't also C headers)
-    .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
-    .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
-    .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
-    .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
-    .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
-    .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
-    .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
-    .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
-    .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
-    .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
-    .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
-    .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
-    .Default(false);
+      // C library headers
+      .Cases({"assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h"}, true)
+      .Cases({"float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h"},
+             true)
+      .Cases({"math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h"}, true)
+      .Cases({"stdatomic.h", "stdbool.h", "stdckdint.h", "stdcountof.h"}, true)
+      .Cases({"stddef.h", "stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h"},
+             true)
+      .Cases({"string.h", "tgmath.h", "threads.h", "time.h", "uchar.h"}, true)
+      .Cases({"wchar.h", "wctype.h"}, true)
+
+      // C++ headers for C library facilities
+      .Cases({"cassert", "ccomplex", "cctype", "cerrno", "cfenv"}, true)
+      .Cases({"cfloat", "cinttypes", "ciso646", "climits", "clocale"}, true)
+      .Cases({"cmath", "csetjmp", "csignal", "cstdalign", "cstdarg"}, true)
+      .Cases({"cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib"}, true)
+      .Cases({"cstring", "ctgmath", "ctime", "cuchar", "cwchar"}, true)
+      .Case("cwctype", true)
+
+      // C++ library headers
+      .Cases({"algorithm", "fstream", "list", "regex", "thread"}, true)
+      .Cases({"array", "functional", "locale", "scoped_allocator", "tuple"},
+             true)
+      .Cases({"atomic", "future", "map", "set", "type_traits"}, true)
+      .Cases(
+          {"bitset", "initializer_list", "memory", "shared_mutex", "typeindex"},
+          true)
+      .Cases({"chrono", "iomanip", "mutex", "sstream", "typeinfo"}, true)
+      .Cases({"codecvt", "ios", "new", "stack", "unordered_map"}, true)
+      .Cases({"complex", "iosfwd", "numeric", "stdexcept", "unordered_set"},
+             true)
+      .Cases(
+          {"condition_variable", "iostream", "ostream", "streambuf", "utility"},
+          true)
+      .Cases({"deque", "istream", "queue", "string", "valarray"}, true)
+      .Cases({"exception", "iterator", "random", "strstream", "vector"}, true)
+      .Cases({"forward_list", "limits", "ratio", "system_error"}, true)
+
+      // POSIX headers (which aren't also C headers)
+      .Cases({"aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h"}, true)
+      .Cases({"fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h"}, true)
+      .Cases({"grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h"}, true)
+      .Cases({"mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h"},
+             true)
+      .Cases({"netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h"},
+             true)
+      .Cases({"regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h"}, true)
+      .Cases({"strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h"},
+             true)
+      .Cases({"sys/resource.h", "sys/select.h", "sys/sem.h", "sys/shm.h",
+              "sys/socket.h"},
+             true)
+      .Cases({"sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h",
+              "sys/types.h"},
+             true)
+      .Cases(
+          {"sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h"},
+          true)
+      .Cases({"tar.h", "termios.h", "trace.h", "ulimit.h"}, true)
+      .Cases({"unistd.h", "utime.h", "utmpx.h", "wordexp.h"}, true)
+      .Default(false);
 }
 
 /// Find a similar string in `Candidates`.
@@ -3648,14 +3665,14 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
           std::pair<tok::TokenKind, SourceLocation> Matches) {
         Diag(CurTok, diag::err_expected) << Expected;
         Diag(Matches.second, diag::note_matching) << Matches.first;
-        if (CurTok.isNot(tok::eod))
+        if (CurTok.isNot(EndTokenKind))
           DiscardUntilEndOfDirective(CurTok);
       };
 
   auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
     if (CurTok.isNot(Kind)) {
       Diag(CurTok, diag::err_expected) << Kind;
-      if (CurTok.isNot(tok::eod))
+      if (CurTok.isNot(EndTokenKind))
         DiscardUntilEndOfDirective(CurTok);
       return false;
     }
@@ -3746,7 +3763,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
       if (Result.isNegative()) {
         Diag(CurTok, diag::err_requires_positive_value)
             << toString(Result, 10) << /*positive*/ 0;
-        if (CurTok.isNot(tok::eod))
+        if (CurTok.isNot(EndTokenKind))
           DiscardUntilEndOfDirective(CurTok);
         return std::nullopt;
       }
@@ -3889,7 +3906,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
       }
       if (!ForHasEmbed) {
         Diag(ParamStartLoc, diag::err_pp_unknown_parameter) << 1 << Parameter;
-        if (CurTok.isNot(tok::eod))
+        if (CurTok.isNot(EndTokenKind))
           DiscardUntilEndOfDirective(CurTok);
         return std::nullopt;
       }
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index dec1956..dd80ae5 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1262,16 +1262,11 @@ EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
 
   std::optional<LexEmbedParametersResult> Params =
       this->LexEmbedParameters(Tok, /*ForHasEmbed=*/true);
-  assert((Params || Tok.is(tok::eod)) &&
-         "expected success or to be at the end of the directive");
 
   if (!Params)
     return EmbedResult::Invalid;
 
-  if (Params->UnrecognizedParams > 0)
-    return EmbedResult::NotFound;
-
-  if (!Tok.is(tok::r_paren)) {
+  if (Tok.isNot(tok::r_paren)) {
     Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after)
         << II << tok::r_paren;
     Diag(LParenLoc, diag::note_matching) << tok::l_paren;
@@ -1280,6 +1275,9 @@ EmbedResult Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) {
     return EmbedResult::Invalid;
   }
 
+  if (Params->UnrecognizedParams > 0)
+    return EmbedResult::NotFound;
+
   SmallString<128> FilenameBuffer;
   StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer);
   if (Filename.empty())
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index bbff627..ec01faf 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -1272,7 +1272,7 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
   // tokens and store them for late parsing at the end of the translation unit.
   if (getLangOpts().DelayedTemplateParsing && Tok.isNot(tok::equal) &&
       TemplateInfo.Kind == ParsedTemplateKind::Template &&
-      Actions.canDelayFunctionBody(D)) {
+      LateParsedAttrs->empty() && Actions.canDelayFunctionBody(D)) {
     MultiTemplateParamsArg TemplateParameterLists(*TemplateInfo.TemplateParams);
 
     ParseScope BodyScope(this, Scope::FnScope | Scope::DeclScope |
@@ -1301,10 +1301,8 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
     }
     return DP;
   }
-  else if (CurParsedObjCImpl &&
-           !TemplateInfo.TemplateParams &&
-           (Tok.is(tok::l_brace) || Tok.is(tok::kw_try) ||
-            Tok.is(tok::colon)) &&
+  if (CurParsedObjCImpl && !TemplateInfo.TemplateParams &&
+      (Tok.is(tok::l_brace) || Tok.is(tok::kw_try) || Tok.is(tok::colon)) &&
       Actions.CurContext->isTranslationUnit()) {
     ParseScope BodyScope(this, Scope::FnScope | Scope::DeclScope |
                                    Scope::CompoundStmtScope);
@@ -1420,7 +1418,8 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
 
   // Late attributes are parsed in the same scope as the function body.
   if (LateParsedAttrs)
-    ParseLexedAttributeList(*LateParsedAttrs, Res, false, true);
+    ParseLexedAttributeList(*LateParsedAttrs, Res, /*EnterScope=*/false,
+                            /*OnDefinition=*/true);
 
   if (SkipFunctionBodies && (!Res || Actions.canSkipFunctionBody(Res)) &&
       trySkippingFunctionBody()) {
diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp
index db14349..e797400 100644
--- a/clang/lib/Sema/CheckExprLifetime.cpp
+++ b/clang/lib/Sema/CheckExprLifetime.cpp
@@ -361,11 +361,11 @@ static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
     if (!Callee->getIdentifier())
       return false;
     return llvm::StringSwitch<bool>(Callee->getName())
-        .Cases("begin", "rbegin", "cbegin", "crbegin", true)
-        .Cases("end", "rend", "cend", "crend", true)
-        .Cases("c_str", "data", "get", true)
+        .Cases({"begin", "rbegin", "cbegin", "crbegin"}, true)
+        .Cases({"end", "rend", "cend", "crend"}, true)
+        .Cases({"c_str", "data", "get"}, true)
         // Map and set types.
-        .Cases("find", "equal_range", "lower_bound", "upper_bound", true)
+        .Cases({"find", "equal_range", "lower_bound", "upper_bound"}, true)
         .Default(false);
   }
   if (Callee->getReturnType()->isReferenceType()) {
@@ -377,7 +377,7 @@ static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
              OO == OverloadedOperatorKind::OO_Star;
     }
     return llvm::StringSwitch<bool>(Callee->getName())
-        .Cases("front", "back", "at", "top", "value", true)
+        .Cases({"front", "back", "at", "top", "value"}, true)
         .Default(false);
   }
   return false;
@@ -394,14 +394,14 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) {
   if (FD->getReturnType()->isPointerType() ||
       isRecordWithAttr<PointerAttr>(FD->getReturnType())) {
     return llvm::StringSwitch<bool>(FD->getName())
-        .Cases("begin", "rbegin", "cbegin", "crbegin", true)
-        .Cases("end", "rend", "cend", "crend", true)
+        .Cases({"begin", "rbegin", "cbegin", "crbegin"}, true)
+        .Cases({"end", "rend", "cend", "crend"}, true)
         .Case("data", true)
         .Default(false);
   }
   if (FD->getReturnType()->isReferenceType()) {
     return llvm::StringSwitch<bool>(FD->getName())
-        .Cases("get", "any_cast", true)
+        .Cases({"get", "any_cast"}, true)
         .Default(false);
   }
   return false;
diff --git a/clang/lib/Sema/SemaAvailability.cpp b/clang/lib/Sema/SemaAvailability.cpp
index f8d61d9..b09e168 100644
--- a/clang/lib/Sema/SemaAvailability.cpp
+++ b/clang/lib/Sema/SemaAvailability.cpp
@@ -102,7 +102,7 @@ Sema::ShouldDiagnoseAvailabilityOfDecl(const NamedDecl *D, std::string *Message,
       break;
     for (const Type *T = TD->getUnderlyingType().getTypePtr(); /**/; /**/) {
       if (auto *TT = dyn_cast<TagType>(T)) {
-        D = TT->getOriginalDecl()->getDefinitionOrSelf();
+        D = TT->getDecl()->getDefinitionOrSelf();
       } else if (isa<SubstTemplateTypeParmType>(T)) {
         // A Subst* node represents a use through a template.
         // Any uses of the underlying declaration happened through it's template
@@ -1019,7 +1019,7 @@ bool DiagnoseUnguardedAvailability::VisitTypeLoc(TypeLoc Ty) {
     return true;
 
   if (const auto *TT = dyn_cast<TagType>(TyPtr)) {
-    TagDecl *TD = TT->getOriginalDecl()->getDefinitionOrSelf();
+    TagDecl *TD = TT->getDecl()->getDefinitionOrSelf();
     DiagnoseDeclAvailability(TD, Range);
 
   } else if (const auto *TD = dyn_cast<TypedefType>(TyPtr)) {
diff --git a/clang/lib/Sema/SemaBPF.cpp b/clang/lib/Sema/SemaBPF.cpp
index be890ab..a9761764 100644
--- a/clang/lib/Sema/SemaBPF.cpp
+++ b/clang/lib/Sema/SemaBPF.cpp
@@ -57,7 +57,7 @@ static bool isValidPreserveTypeInfoArg(Expr *Arg) {
 
   // Record type or Enum type.
   if (const auto *RT = ArgType->getAsCanonical<TagType>())
-    if (!RT->getOriginalDecl()->getDeclName().isEmpty())
+    if (!RT->getDecl()->getDeclName().isEmpty())
       return true;
 
   return false;
diff --git a/clang/lib/Sema/SemaCXXScopeSpec.cpp b/clang/lib/Sema/SemaCXXScopeSpec.cpp
index 97ba1a51..c52fc5b 100644
--- a/clang/lib/Sema/SemaCXXScopeSpec.cpp
+++ b/clang/lib/Sema/SemaCXXScopeSpec.cpp
@@ -31,8 +31,7 @@ static CXXRecordDecl *getCurrentInstantiationOf(QualType T,
   const TagType *TagTy = dyn_cast<TagType>(T->getCanonicalTypeInternal());
   if (!isa_and_present<RecordType, InjectedClassNameType>(TagTy))
     return nullptr;
-  auto *RD =
-      cast<CXXRecordDecl>(TagTy->getOriginalDecl())->getDefinitionOrSelf();
+  auto *RD = cast<CXXRecordDecl>(TagTy->getDecl())->getDefinitionOrSelf();
   if (isa<InjectedClassNameType>(TagTy) ||
       RD->isCurrentInstantiation(CurContext))
     return RD;
@@ -121,7 +120,7 @@ DeclContext *Sema::computeDeclContext(const CXXScopeSpec &SS,
         }
       } else if (const auto *RecordT = dyn_cast<RecordType>(NNSType)) {
         // The nested name specifier refers to a member of a class template.
-        return RecordT->getOriginalDecl()->getDefinitionOrSelf();
+        return RecordT->getDecl()->getDefinitionOrSelf();
       }
     }
 
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index d986e3b..ddf17d8 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -962,7 +962,7 @@ void CastOperation::CheckDynamicCast() {
   }
 
   // C++ 5.2.7p6: Otherwise, v shall be [polymorphic].
-  const RecordDecl *SrcDecl = SrcRecord->getOriginalDecl()->getDefinition();
+  const RecordDecl *SrcDecl = SrcRecord->getDecl()->getDefinition();
   assert(SrcDecl && "Definition missing");
   if (!cast<CXXRecordDecl>(SrcDecl)->isPolymorphic()) {
     Self.Diag(OpRange.getBegin(), diag::err_bad_dynamic_cast_not_polymorphic)
@@ -1453,7 +1453,7 @@ static TryCastResult TryStaticCast(Sema &Self, ExprResult &SrcExpr,
   // converted to an integral type. [...] A value of a scoped enumeration type
   // can also be explicitly converted to a floating-point type [...].
   if (const EnumType *Enum = dyn_cast<EnumType>(SrcType)) {
-    if (Enum->getOriginalDecl()->isScoped()) {
+    if (Enum->getDecl()->isScoped()) {
       if (DestType->isBooleanType()) {
         Kind = CK_IntegralToBoolean;
         return TC_Success;
@@ -3105,7 +3105,7 @@ void CastOperation::CheckCStyleCast() {
       }
 
       // GCC's cast to union extension.
-      if (RecordDecl *RD = DestRecordTy->getOriginalDecl(); RD->isUnion()) {
+      if (RecordDecl *RD = DestRecordTy->getDecl(); RD->isUnion()) {
         if (CastExpr::getTargetFieldForToUnionCast(RD->getDefinitionOrSelf(),
                                                    SrcType)) {
           Self.Diag(OpRange.getBegin(), diag::ext_typecheck_cast_to_union)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 063db05..4f409ca 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3580,9 +3580,8 @@ static bool CheckNonNullExpr(Sema &S, const Expr *Expr) {
   // As a special case, transparent unions initialized with zero are
   // considered null for the purposes of the nonnull attribute.
   if (const RecordType *UT = Expr->getType()->getAsUnionType();
-      UT && UT->getOriginalDecl()
-                ->getMostRecentDecl()
-                ->hasAttr<TransparentUnionAttr>()) {
+      UT &&
+      UT->getDecl()->getMostRecentDecl()->hasAttr<TransparentUnionAttr>()) {
     if (const auto *CLE = dyn_cast<CompoundLiteralExpr>(Expr))
       if (const auto *ILE = dyn_cast<InitListExpr>(CLE->getInitializer()))
         Expr = ILE->getInit(0);
@@ -12879,8 +12878,8 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
 
   if (const EnumType *SourceEnum = Source->getAsCanonical<EnumType>())
     if (const EnumType *TargetEnum = Target->getAsCanonical<EnumType>())
-      if (SourceEnum->getOriginalDecl()->hasNameForLinkage() &&
-          TargetEnum->getOriginalDecl()->hasNameForLinkage() &&
+      if (SourceEnum->getDecl()->hasNameForLinkage() &&
+          TargetEnum->getDecl()->hasNameForLinkage() &&
           SourceEnum != TargetEnum) {
         if (SourceMgr.isInSystemMacro(CC))
           return;
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 5dd4949..0514d10 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -2070,7 +2070,7 @@ static const char *GetCompletionTypeString(QualType T, ASTContext &Context,
 
     // Anonymous tag types are constant strings.
     if (const TagType *TagT = dyn_cast<TagType>(T))
-      if (TagDecl *Tag = TagT->getOriginalDecl())
+      if (TagDecl *Tag = TagT->getDecl())
         if (!Tag->hasNameForLinkage()) {
           switch (Tag->getTagKind()) {
           case TagTypeKind::Struct:
diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp
index 229e91e..c0aba83 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -640,10 +640,9 @@ static void checkNoThrow(Sema &S, const Stmt *E,
         QualType::DestructionKind::DK_cxx_destructor) {
       const auto *T =
           cast<RecordType>(ReturnType.getCanonicalType().getTypePtr());
-      checkDeclNoexcept(cast<CXXRecordDecl>(T->getOriginalDecl())
-                            ->getDefinition()
-                            ->getDestructor(),
-                        /*IsDtor=*/true);
+      checkDeclNoexcept(
+          cast<CXXRecordDecl>(T->getDecl())->getDefinition()->getDestructor(),
+          /*IsDtor=*/true);
     }
   } else
     for (const auto *Child : E->children()) {
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 8ac09c4..04d46d6 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13816,13 +13816,20 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
       VDecl->setInvalidDecl();
   }
 
-  // C++ [module.import/6] external definitions are not permitted in header
-  // units.
+  // C++ [module.import/6]
+  //   ...
+  //   A header unit shall not contain a definition of a non-inline function or
+  //   variable whose name has external linkage.
+  //
+  // We choose to allow weak & selectany definitions, as they are common in
+  // headers, and have semantics similar to inline definitions which are allowed
+  // in header units.
   if (getLangOpts().CPlusPlusModules && currentModuleIsHeaderUnit() &&
       !VDecl->isInvalidDecl() && VDecl->isThisDeclarationADefinition() &&
       VDecl->getFormalLinkage() == Linkage::External && !VDecl->isInline() &&
       !VDecl->isTemplated() && !isa<VarTemplateSpecializationDecl>(VDecl) &&
-      !VDecl->getInstantiatedFromStaticDataMember()) {
+      !VDecl->getInstantiatedFromStaticDataMember() &&
+      !(VDecl->hasAttr<SelectAnyAttr>() || VDecl->hasAttr<WeakAttr>())) {
     Diag(VDecl->getLocation(), diag::err_extern_def_in_header_unit);
     VDecl->setInvalidDecl();
   }
@@ -16153,16 +16160,24 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
     }
   }
 
-  // C++ [module.import/6] external definitions are not permitted in header
-  // units.  Deleted and Defaulted functions are implicitly inline (but the
+  // C++ [module.import/6]
+  //   ...
+  //   A header unit shall not contain a definition of a non-inline function or
+  //   variable whose name has external linkage.
+  //
+  // Deleted and Defaulted functions are implicitly inline (but the
   // inline state is not set at this point, so check the BodyKind explicitly).
+  // We choose to allow weak & selectany definitions, as they are common in
+  // headers, and have semantics similar to inline definitions which are allowed
+  // in header units.
   // FIXME: Consider an alternate location for the test where the inlined()
   // state is complete.
   if (getLangOpts().CPlusPlusModules && currentModuleIsHeaderUnit() &&
       !FD->isInvalidDecl() && !FD->isInlined() &&
       BodyKind != FnBodyKind::Delete && BodyKind != FnBodyKind::Default &&
       FD->getFormalLinkage() == Linkage::External && !FD->isTemplated() &&
-      !FD->isTemplateInstantiation()) {
+      !FD->isTemplateInstantiation() &&
+      !(FD->hasAttr<SelectAnyAttr>() || FD->hasAttr<WeakAttr>())) {
     assert(FD->isThisDeclarationADefinition());
     Diag(FD->getLocation(), diag::err_extern_def_in_header_unit);
     FD->setInvalidDecl();
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 3107876..e6f8748 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -1255,7 +1255,7 @@ bool Sema::isValidPointerAttrType(QualType T, bool RefOkay) {
   // The nonnull attribute, and other similar attributes, can be applied to a
   // transparent union that contains a pointer type.
   if (const RecordType *UT = T->getAsUnionType()) {
-    RecordDecl *UD = UT->getOriginalDecl()->getDefinitionOrSelf();
+    RecordDecl *UD = UT->getDecl()->getDefinitionOrSelf();
     if (UD->hasAttr<TransparentUnionAttr>()) {
       for (const auto *I : UD->fields()) {
         QualType QT = I->getType();
@@ -3629,18 +3629,20 @@ static FormatAttrKind getFormatAttrKind(StringRef Format) {
       // Check for formats that get handled specially.
       .Case("NSString", NSStringFormat)
       .Case("CFString", CFStringFormat)
-      .Cases("gnu_strftime", "strftime", StrftimeFormat)
+      .Cases({"gnu_strftime", "strftime"}, StrftimeFormat)
 
       // Otherwise, check for supported formats.
-      .Cases("gnu_scanf", "scanf", "gnu_printf", "printf", "printf0",
-             "gnu_strfmon", "strfmon", SupportedFormat)
-      .Cases("cmn_err", "vcmn_err", "zcmn_err", SupportedFormat)
-      .Cases("kprintf", "syslog", SupportedFormat) // OpenBSD.
-      .Case("freebsd_kprintf", SupportedFormat)    // FreeBSD.
+      .Cases({"gnu_scanf", "scanf", "gnu_printf", "printf", "printf0",
+              "gnu_strfmon", "strfmon"},
+             SupportedFormat)
+      .Cases({"cmn_err", "vcmn_err", "zcmn_err"}, SupportedFormat)
+      .Cases({"kprintf", "syslog"}, SupportedFormat) // OpenBSD.
+      .Case("freebsd_kprintf", SupportedFormat)      // FreeBSD.
       .Case("os_trace", SupportedFormat)
       .Case("os_log", SupportedFormat)
 
-      .Cases("gcc_diag", "gcc_cdiag", "gcc_cxxdiag", "gcc_tdiag", IgnoredFormat)
+      .Cases({"gcc_diag", "gcc_cdiag", "gcc_cxxdiag", "gcc_tdiag"},
+             IgnoredFormat)
       .Default(InvalidFormat);
 }
 
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 215431c..d41ab12 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -5630,7 +5630,7 @@ bool Sema::SetCtorInitializers(CXXConstructorDecl *Constructor, bool AnyErrors,
 
 static void PopulateKeysForFields(FieldDecl *Field, SmallVectorImpl<const void*> &IdealInits) {
   if (const RecordType *RT = Field->getType()->getAsCanonical<RecordType>()) {
-    const RecordDecl *RD = RT->getOriginalDecl();
+    const RecordDecl *RD = RT->getDecl();
     if (RD->isAnonymousStructOrUnion()) {
       for (auto *Field : RD->getDefinitionOrSelf()->fields())
         PopulateKeysForFields(Field, IdealInits);
@@ -7630,9 +7630,8 @@ static bool defaultedSpecialMemberIsConstexpr(
         continue;
       QualType BaseType = S.Context.getBaseElementType(F->getType());
       if (const RecordType *RecordTy = BaseType->getAsCanonical<RecordType>()) {
-        CXXRecordDecl *FieldRecDecl =
-            cast<CXXRecordDecl>(RecordTy->getOriginalDecl())
-                ->getDefinitionOrSelf();
+        auto *FieldRecDecl =
+            cast<CXXRecordDecl>(RecordTy->getDecl())->getDefinitionOrSelf();
         if (!specialMemberIsConstexpr(S, FieldRecDecl, CSM,
                                       BaseType.getCVRQualifiers(),
                                       ConstArg && !F->isMutable()))
@@ -10645,7 +10644,7 @@ void Sema::checkIllFormedTrivialABIStruct(CXXRecordDecl &RD) {
     if (const auto *RT =
             FT->getBaseElementTypeUnsafe()->getAsCanonical<RecordType>())
       if (!RT->isDependentType() &&
-          !cast<CXXRecordDecl>(RT->getOriginalDecl()->getDefinitionOrSelf())
+          !cast<CXXRecordDecl>(RT->getDecl()->getDefinitionOrSelf())
                ->canPassInRegisters()) {
         PrintDiagAndRemoveAttr(5);
         return;
diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp
index 98eb5af..3df9f9c 100644
--- a/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/clang/lib/Sema/SemaDeclObjC.cpp
@@ -3232,10 +3232,8 @@ static bool tryMatchRecordTypes(ASTContext &Context,
   assert(lt && rt && lt != rt);
 
   if (!isa<RecordType>(lt) || !isa<RecordType>(rt)) return false;
-  RecordDecl *left =
-      cast<RecordType>(lt)->getOriginalDecl()->getDefinitionOrSelf();
-  RecordDecl *right =
-      cast<RecordType>(rt)->getOriginalDecl()->getDefinitionOrSelf();
+  RecordDecl *left = cast<RecordType>(lt)->getDecl()->getDefinitionOrSelf();
+  RecordDecl *right = cast<RecordType>(rt)->getDecl()->getDefinitionOrSelf();
 
   // Require union-hood to match.
   if (left->isUnion() != right->isUnion()) return false;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 01abc1f..3e0e9bb 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1535,12 +1535,8 @@ void Sema::checkEnumArithmeticConversions(Expr *LHS, Expr *RHS,
     // are ill-formed.
     if (getLangOpts().CPlusPlus26)
       DiagID = diag::warn_conv_mixed_enum_types_cxx26;
-    else if (!L->castAsCanonical<EnumType>()
-                  ->getOriginalDecl()
-                  ->hasNameForLinkage() ||
-             !R->castAsCanonical<EnumType>()
-                  ->getOriginalDecl()
-                  ->hasNameForLinkage()) {
+    else if (!L->castAsCanonical<EnumType>()->getDecl()->hasNameForLinkage() ||
+             !R->castAsCanonical<EnumType>()->getDecl()->hasNameForLinkage()) {
       // If either enumeration type is unnamed, it's less likely that the
       // user cares about this, but this situation is still deprecated in
       // C++2a. Use a different warning group.
@@ -7095,7 +7091,7 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl,
     for (unsigned i = 0, e = Args.size(); i != e; i++) {
       if (const auto *RT =
               dyn_cast<RecordType>(Args[i]->getType().getCanonicalType())) {
-        if (RT->getOriginalDecl()->isOrContainsUnion())
+        if (RT->getDecl()->isOrContainsUnion())
           Diag(Args[i]->getBeginLoc(), diag::warn_cmse_nonsecure_union)
               << 0 << i;
       }
@@ -9748,7 +9744,7 @@ Sema::CheckTransparentUnionArgumentConstraints(QualType ArgType,
   if (!UT)
     return AssignConvertType::Incompatible;
 
-  RecordDecl *UD = UT->getOriginalDecl()->getDefinitionOrSelf();
+  RecordDecl *UD = UT->getDecl()->getDefinitionOrSelf();
   if (!UD->hasAttr<TransparentUnionAttr>())
     return AssignConvertType::Incompatible;
 
@@ -10844,7 +10840,7 @@ static void diagnoseScopedEnums(Sema &S, const SourceLocation Loc,
   auto DiagnosticHelper = [&S](const Expr *expr, const QualType type) {
     SourceLocation BeginLoc = expr->getBeginLoc();
     QualType IntType = type->castAs<EnumType>()
-                           ->getOriginalDecl()
+                           ->getDecl()
                            ->getDefinitionOrSelf()
                            ->getIntegerType();
     std::string InsertionString = "static_cast<" + IntType.getAsString() + ">(";
@@ -11533,7 +11529,7 @@ QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS,
 
 static bool isScopedEnumerationType(QualType T) {
   if (const EnumType *ET = T->getAsCanonical<EnumType>())
-    return ET->getOriginalDecl()->isScoped();
+    return ET->getDecl()->isScoped();
   return false;
 }
 
@@ -13839,7 +13835,7 @@ static void DiagnoseRecursiveConstFields(Sema &S, const ValueDecl *VD,
   while (RecordTypeList.size() > NextToCheckIndex) {
     bool IsNested = NextToCheckIndex > 0;
     for (const FieldDecl *Field : RecordTypeList[NextToCheckIndex]
-                                      ->getOriginalDecl()
+                                      ->getDecl()
                                       ->getDefinitionOrSelf()
                                       ->fields()) {
       // First, check every field for constness.
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 0fe242dce..fe1f89b 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1988,7 +1988,7 @@ static bool doesUsualArrayDeleteWantSize(Sema &S, SourceLocation loc,
   DeclarationName deleteName =
     S.Context.DeclarationNames.getCXXOperatorName(OO_Array_Delete);
   LookupResult ops(S, deleteName, loc, Sema::LookupOrdinaryName);
-  S.LookupQualifiedName(ops, record->getOriginalDecl()->getDefinitionOrSelf());
+  S.LookupQualifiedName(ops, record->getDecl()->getDefinitionOrSelf());
 
   // We're just doing this for information.
   ops.suppressDiagnostics();
@@ -6667,8 +6667,7 @@ ExprResult Sema::MaybeBindToTemporary(Expr *E) {
 
   // That should be enough to guarantee that this type is complete, if we're
   // not processing a decltype expression.
-  CXXRecordDecl *RD =
-      cast<CXXRecordDecl>(RT->getOriginalDecl())->getDefinitionOrSelf();
+  auto *RD = cast<CXXRecordDecl>(RT->getDecl())->getDefinitionOrSelf();
   if (RD->isInvalidDecl() || RD->isDependentContext())
     return E;
 
diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp
index 331f6e5..4daf0170 100644
--- a/clang/lib/Sema/SemaExprObjC.cpp
+++ b/clang/lib/Sema/SemaExprObjC.cpp
@@ -3846,8 +3846,7 @@ static inline T *getObjCBridgeAttr(const TypedefType *TD) {
   if (QT->isPointerType()) {
     QT = QT->getPointeeType();
     if (const RecordType *RT = QT->getAsCanonical<RecordType>()) {
-      for (auto *Redecl :
-           RT->getOriginalDecl()->getMostRecentDecl()->redecls()) {
+      for (auto *Redecl : RT->getDecl()->getMostRecentDecl()->redecls()) {
         if (auto *attr = Redecl->getAttr<T>())
           return attr;
       }
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 72b2ac9..f347066 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -782,7 +782,7 @@ bool SemaHLSL::isSemanticValid(FunctionDecl *FD, DeclaratorDecl *D) {
   if (!RT)
     return false;
 
-  const RecordDecl *RD = RT->getOriginalDecl();
+  const RecordDecl *RD = RT->getDecl();
   for (FieldDecl *Field : RD->fields()) {
     if (!isSemanticValid(FD, Field))
       return false;
@@ -1986,7 +1986,7 @@ SemaHLSL::TakeLocForHLSLAttribute(const HLSLAttributedResourceType *RT) {
 // requirements and adds them to Bindings
 void SemaHLSL::collectResourceBindingsOnUserRecordDecl(const VarDecl *VD,
                                                        const RecordType *RT) {
-  const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
   for (FieldDecl *FD : RD->fields()) {
     const Type *Ty = FD->getType()->getUnqualifiedDesugaredType();
 
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 543db46..f7974eb 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -775,7 +775,7 @@ void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
 
   if (Init >= NumInits || !ILE->getInit(Init)) {
     if (const RecordType *RType = ILE->getType()->getAsCanonical<RecordType>())
-      if (!RType->getOriginalDecl()->isUnion())
+      if (!RType->getDecl()->isUnion())
         assert((Init < NumInits || VerifyOnly) &&
                "This ILE should have been expanded");
 
@@ -9186,9 +9186,8 @@ bool InitializationSequence::Diagnose(Sema &S,
                    diag::note_member_declared_at);
 
             if (const auto *Record = Entity.getType()->getAs<RecordType>())
-              S.Diag(Record->getOriginalDecl()->getLocation(),
-                     diag::note_previous_decl)
-                  << S.Context.getCanonicalTagType(Record->getOriginalDecl());
+              S.Diag(Record->getDecl()->getLocation(), diag::note_previous_decl)
+                  << S.Context.getCanonicalTagType(Record->getDecl());
           }
           break;
         }
@@ -9974,8 +9973,8 @@ QualType Sema::DeduceTemplateSpecializationFromInitializer(
         // Cases where template arguments in the RHS of the alias are not
         // dependent. e.g.
         //   using AliasFoo = Foo<bool>;
-        if (const auto *CTSD = llvm::dyn_cast<ClassTemplateSpecializationDecl>(
-                RT->getOriginalDecl()))
+        if (const auto *CTSD =
+                llvm::dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl()))
           Template = CTSD->getSpecializedTemplate();
       }
     }
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 25728de..5915d6e 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -2727,9 +2727,7 @@ bool Sema::LookupParsedName(LookupResult &R, Scope *S, CXXScopeSpec *SS,
     IsDependent = !DC && ObjectType->isDependentType();
     assert(((!DC && ObjectType->isDependentType()) ||
             !ObjectType->isIncompleteType() || !ObjectType->getAs<TagType>() ||
-            ObjectType->castAs<TagType>()
-                ->getOriginalDecl()
-                ->isEntityBeingDefined()) &&
+            ObjectType->castAs<TagType>()->getDecl()->isEntityBeingDefined()) &&
            "Caller should have completed object type");
   } else if (SS && SS->isNotEmpty()) {
     // This nested-name-specifier occurs after another nested-name-specifier,
@@ -3191,9 +3189,8 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
     //        namespaces of its associated classes.
     case Type::Record: {
       // FIXME: This should use the original decl.
-      CXXRecordDecl *Class =
-          cast<CXXRecordDecl>(cast<RecordType>(T)->getOriginalDecl())
-              ->getDefinitionOrSelf();
+      auto *Class = cast<CXXRecordDecl>(cast<RecordType>(T)->getDecl())
+                        ->getDefinitionOrSelf();
       addAssociatedClassesAndNamespaces(Result, Class);
       break;
     }
@@ -4606,7 +4603,7 @@ static void getNestedNameSpecifierIdentifiers(
       case Type::InjectedClassName: {
         auto *TT = cast<TagType>(T);
         getNestedNameSpecifierIdentifiers(TT->getQualifier(), Identifiers);
-        Identifiers.push_back(TT->getOriginalDecl()->getIdentifier());
+        Identifiers.push_back(TT->getDecl()->getIdentifier());
         return;
       }
       case Type::Typedef: {
diff --git a/clang/lib/Sema/SemaObjC.cpp b/clang/lib/Sema/SemaObjC.cpp
index 4f9470a..7aaa56e 100644
--- a/clang/lib/Sema/SemaObjC.cpp
+++ b/clang/lib/Sema/SemaObjC.cpp
@@ -1407,7 +1407,7 @@ SemaObjC::ObjCSubscriptKind SemaObjC::CheckSubscriptingKind(Expr *FromE) {
   int NoIntegrals = 0, NoObjCIdPointers = 0;
   SmallVector<CXXConversionDecl *, 4> ConversionDecls;
 
-  for (NamedDecl *D : cast<CXXRecordDecl>(RecordTy->getOriginalDecl())
+  for (NamedDecl *D : cast<CXXRecordDecl>(RecordTy->getDecl())
                           ->getDefinitionOrSelf()
                           ->getVisibleConversionFunctions()) {
     if (CXXConversionDecl *Conversion =
@@ -1511,7 +1511,7 @@ bool SemaObjC::isCFStringType(QualType T) {
   if (!RT)
     return false;
 
-  const RecordDecl *RD = RT->getOriginalDecl();
+  const RecordDecl *RD = RT->getDecl();
   if (RD->getTagKind() != TagTypeKind::Struct)
     return false;
 
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 8339bb1..7da09e8 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -2594,7 +2594,7 @@ IsTransparentUnionStandardConversion(Sema &S, Expr* From,
   if (!UT)
     return false;
   // The field to initialize within the transparent union.
-  const RecordDecl *UD = UT->getOriginalDecl()->getDefinitionOrSelf();
+  const RecordDecl *UD = UT->getDecl()->getDefinitionOrSelf();
   if (!UD->hasAttr<TransparentUnionAttr>())
     return false;
   // It's compatible if the expression matches any of the fields.
@@ -3973,7 +3973,7 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
     if (!S.isCompleteType(From->getExprLoc(), ToType)) {
       // We're not going to find any constructors.
     } else if (auto *ToRecordDecl =
-                   dyn_cast<CXXRecordDecl>(ToRecordType->getOriginalDecl())) {
+                   dyn_cast<CXXRecordDecl>(ToRecordType->getDecl())) {
       ToRecordDecl = ToRecordDecl->getDefinitionOrSelf();
 
       Expr **Args = &From;
@@ -4048,7 +4048,7 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
   } else if (const RecordType *FromRecordType =
                  From->getType()->getAsCanonical<RecordType>()) {
     if (auto *FromRecordDecl =
-            dyn_cast<CXXRecordDecl>(FromRecordType->getOriginalDecl())) {
+            dyn_cast<CXXRecordDecl>(FromRecordType->getDecl())) {
       FromRecordDecl = FromRecordDecl->getDefinitionOrSelf();
       // Add all of the conversion functions as candidates.
       const auto &Conversions = FromRecordDecl->getVisibleConversionFunctions();
@@ -6840,7 +6840,7 @@ ExprResult Sema::PerformContextualImplicitConversion(
   UnresolvedSet<4>
       ViableConversions; // These are *potentially* viable in C++1y.
   UnresolvedSet<4> ExplicitConversions;
-  const auto &Conversions = cast<CXXRecordDecl>(RecordTy->getOriginalDecl())
+  const auto &Conversions = cast<CXXRecordDecl>(RecordTy->getDecl())
                                 ->getDefinitionOrSelf()
                                 ->getVisibleConversionFunctions();
 
@@ -10194,9 +10194,7 @@ public:
 
       if (S.getLangOpts().CPlusPlus11) {
         for (QualType EnumTy : CandidateTypes[ArgIdx].enumeration_types()) {
-          if (!EnumTy->castAsCanonical<EnumType>()
-                   ->getOriginalDecl()
-                   ->isScoped())
+          if (!EnumTy->castAsCanonical<EnumType>()->getDecl()->isScoped())
             continue;
 
           if (!AddedTypes.insert(S.Context.getCanonicalType(EnumTy)).second)
diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index b981c35..67f3856 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -221,8 +221,8 @@ static SourceLocation SourceLocationForUserDeclaredType(QualType QT) {
   SourceLocation Loc;
   const Type *T = QT->getUnqualifiedDesugaredType();
   if (const TagType *TT = dyn_cast<TagType>(T))
-    Loc = TT->getOriginalDecl()->getLocation();
-  else if (const ObjCInterfaceType *ObjCIT = dyn_cast<ObjCInterfaceType>(T))
+    Loc = TT->getDecl()->getLocation();
+  else if (const auto *ObjCIT = dyn_cast<ObjCInterfaceType>(T))
     Loc = ObjCIT->getDecl()->getLocation();
   return Loc;
 }
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index ae0bb616..f398963 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -1277,11 +1277,11 @@ static void checkEnumTypesInSwitchStmt(Sema &S, const Expr *Cond,
     return;
 
   // Ignore anonymous enums.
-  if (!CondEnumType->getOriginalDecl()->getIdentifier() &&
-      !CondEnumType->getOriginalDecl()->getTypedefNameForAnonDecl())
+  if (!CondEnumType->getDecl()->getIdentifier() &&
+      !CondEnumType->getDecl()->getTypedefNameForAnonDecl())
     return;
-  if (!CaseEnumType->getOriginalDecl()->getIdentifier() &&
-      !CaseEnumType->getOriginalDecl()->getTypedefNameForAnonDecl())
+  if (!CaseEnumType->getDecl()->getIdentifier() &&
+      !CaseEnumType->getDecl()->getTypedefNameForAnonDecl())
     return;
 
   if (S.Context.hasSameUnqualifiedType(CondType, CaseType))
@@ -3760,7 +3760,7 @@ private:
   Sema &S;
 };
 bool LocalTypedefNameReferencer::VisitRecordType(RecordType *RT) {
-  auto *R = dyn_cast<CXXRecordDecl>(RT->getOriginalDecl());
+  auto *R = dyn_cast<CXXRecordDecl>(RT->getDecl());
   if (!R || !R->isLocalClass() || !R->isLocalClass()->isExternallyVisible() ||
       R->isDependentType())
     return true;
@@ -3979,7 +3979,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp,
             << RetValExp->getSourceRange();
     if (FD->hasAttr<CmseNSEntryAttr>() && RetValExp) {
       if (const auto *RT = dyn_cast<RecordType>(FnRetType.getCanonicalType())) {
-        if (RT->getOriginalDecl()->isOrContainsUnion())
+        if (RT->getDecl()->isOrContainsUnion())
           Diag(RetValExp->getBeginLoc(), diag::warn_cmse_nonsecure_union) << 1;
       }
     }
diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp
index 0438af7..f957bdf 100644
--- a/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/clang/lib/Sema/SemaStmtAsm.cpp
@@ -908,7 +908,7 @@ bool Sema::LookupInlineAsmField(StringRef Base, StringRef Member,
     LookupResult FieldResult(*this, &Context.Idents.get(NextMember),
                              SourceLocation(), LookupMemberName);
 
-    RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+    RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
     if (!LookupQualifiedName(FieldResult, RD))
       return true;
 
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 3a6ff99..2cc6593 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -408,9 +408,7 @@ bool Sema::LookupTemplateName(LookupResult &Found, Scope *S, CXXScopeSpec &SS,
     IsDependent = !LookupCtx && ObjectType->isDependentType();
     assert((IsDependent || !ObjectType->isIncompleteType() ||
             !ObjectType->getAs<TagType>() ||
-            ObjectType->castAs<TagType>()
-                ->getOriginalDecl()
-                ->isEntityBeingDefined()) &&
+            ObjectType->castAs<TagType>()->getDecl()->isEntityBeingDefined()) &&
            "Caller should have completed object type");
 
     // Template names cannot appear inside an Objective-C class or object type
@@ -1819,7 +1817,7 @@ public:
   }
 
   bool VisitTagType(const TagType *T) override {
-    return TraverseDecl(T->getOriginalDecl());
+    return TraverseDecl(T->getDecl());
   }
 
   bool TraverseDecl(const Decl *D) override {
@@ -2790,7 +2788,7 @@ struct DependencyChecker : DynamicRecursiveASTVisitor {
     // An InjectedClassNameType will never have a dependent template name,
     // so no need to traverse it.
     return TraverseTemplateArguments(
-        T->getTemplateArgs(T->getOriginalDecl()->getASTContext()));
+        T->getTemplateArgs(T->getDecl()->getASTContext()));
   }
 };
 } // end anonymous namespace
@@ -2914,7 +2912,7 @@ TemplateParameterList *Sema::MatchTemplateParametersToScopeSpecifier(
     if (const EnumType *EnumT = T->getAsCanonical<EnumType>()) {
       // FIXME: Forward-declared enums require a TSK_ExplicitSpecialization
       // check here.
-      EnumDecl *Enum = EnumT->getOriginalDecl();
+      EnumDecl *Enum = EnumT->getDecl();
 
       // Get to the parent type.
       if (TypeDecl *Parent = dyn_cast<TypeDecl>(Enum->getParent()))
@@ -3352,7 +3350,7 @@ static QualType builtinCommonTypeImpl(Sema &S, ElaboratedTypeKeyword Keyword,
 }
 
 static bool isInVkNamespace(const RecordType *RT) {
-  DeclContext *DC = RT->getOriginalDecl()->getDeclContext();
+  DeclContext *DC = RT->getDecl()->getDeclContext();
   if (!DC)
     return false;
 
@@ -3369,9 +3367,8 @@ static SpirvOperand checkHLSLSpirvTypeOperand(Sema &SemaRef,
   if (auto *RT = OperandArg->getAsCanonical<RecordType>()) {
     bool Literal = false;
     SourceLocation LiteralLoc;
-    if (isInVkNamespace(RT) && RT->getOriginalDecl()->getName() == "Literal") {
-      auto SpecDecl =
-          dyn_cast<ClassTemplateSpecializationDecl>(RT->getOriginalDecl());
+    if (isInVkNamespace(RT) && RT->getDecl()->getName() == "Literal") {
+      auto SpecDecl = dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl());
       assert(SpecDecl);
 
       const TemplateArgumentList &LiteralArgs = SpecDecl->getTemplateArgs();
@@ -3382,9 +3379,8 @@ static SpirvOperand checkHLSLSpirvTypeOperand(Sema &SemaRef,
     }
 
     if (RT && isInVkNamespace(RT) &&
-        RT->getOriginalDecl()->getName() == "integral_constant") {
-      auto SpecDecl =
-          dyn_cast<ClassTemplateSpecializationDecl>(RT->getOriginalDecl());
+        RT->getDecl()->getName() == "integral_constant") {
+      auto SpecDecl = dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl());
       assert(SpecDecl);
 
       const TemplateArgumentList &ConstantArgs = SpecDecl->getTemplateArgs();
@@ -4110,7 +4106,7 @@ TypeResult Sema::ActOnTagTemplateIdType(TagUseKind TUK,
 
   // Check the tag kind
   if (const RecordType *RT = Result->getAs<RecordType>()) {
-    RecordDecl *D = RT->getOriginalDecl();
+    RecordDecl *D = RT->getDecl();
 
     IdentifierInfo *Id = D->getIdentifier();
     assert(Id && "templated class must have an identifier");
@@ -6383,11 +6379,11 @@ bool UnnamedLocalNoLinkageFinder::VisitDeducedTemplateSpecializationType(
 }
 
 bool UnnamedLocalNoLinkageFinder::VisitRecordType(const RecordType* T) {
-  return VisitTagDecl(T->getOriginalDecl()->getDefinitionOrSelf());
+  return VisitTagDecl(T->getDecl()->getDefinitionOrSelf());
 }
 
 bool UnnamedLocalNoLinkageFinder::VisitEnumType(const EnumType* T) {
-  return VisitTagDecl(T->getOriginalDecl()->getDefinitionOrSelf());
+  return VisitTagDecl(T->getDecl()->getDefinitionOrSelf());
 }
 
 bool UnnamedLocalNoLinkageFinder::VisitTemplateTypeParmType(
@@ -6412,7 +6408,7 @@ bool UnnamedLocalNoLinkageFinder::VisitTemplateSpecializationType(
 
 bool UnnamedLocalNoLinkageFinder::VisitInjectedClassNameType(
                                               const InjectedClassNameType* T) {
-  return VisitTagDecl(T->getOriginalDecl()->getDefinitionOrSelf());
+  return VisitTagDecl(T->getDecl()->getDefinitionOrSelf());
 }
 
 bool UnnamedLocalNoLinkageFinder::VisitDependentNameType(
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 3baa977..6964242 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -5577,7 +5577,7 @@ static TemplateDeductionResult CheckDeductionConsistency(
   bool IsDeductionGuide = isa<CXXDeductionGuideDecl>(FTD->getTemplatedDecl());
   if (IsDeductionGuide) {
     if (auto *Injected = P->getAsCanonical<InjectedClassNameType>())
-      P = Injected->getOriginalDecl()->getCanonicalTemplateSpecializationType(
+      P = Injected->getDecl()->getCanonicalTemplateSpecializationType(
           S.Context);
   }
   QualType InstP = S.SubstType(P.getCanonicalType(), MLTAL, FTD->getLocation(),
@@ -5598,10 +5598,10 @@ static TemplateDeductionResult CheckDeductionConsistency(
   auto T2 = S.Context.getUnqualifiedArrayType(A.getNonReferenceType());
   if (IsDeductionGuide) {
     if (auto *Injected = T1->getAsCanonical<InjectedClassNameType>())
-      T1 = Injected->getOriginalDecl()->getCanonicalTemplateSpecializationType(
+      T1 = Injected->getDecl()->getCanonicalTemplateSpecializationType(
           S.Context);
     if (auto *Injected = T2->getAsCanonical<InjectedClassNameType>())
-      T2 = Injected->getOriginalDecl()->getCanonicalTemplateSpecializationType(
+      T2 = Injected->getDecl()->getCanonicalTemplateSpecializationType(
           S.Context);
   }
   if (!S.Context.hasSameType(T1, T2))
@@ -6973,7 +6973,7 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
 
   case Type::InjectedClassName:
     T = cast<InjectedClassNameType>(T)
-            ->getOriginalDecl()
+            ->getDecl()
             ->getCanonicalTemplateSpecializationType(Ctx);
     [[fallthrough]];
 
diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
index 8ba23aa..ad50600 100644
--- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
+++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
@@ -996,7 +996,7 @@ getRHSTemplateDeclAndArgs(Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate) {
     // dependent. e.g.
     //   using AliasFoo = Foo<bool>;
     if (const auto *CTSD =
-            dyn_cast<ClassTemplateSpecializationDecl>(RT->getOriginalDecl())) {
+            dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl())) {
       Template = CTSD->getSpecializedTemplate();
       AliasRhsTemplateArgs = CTSD->getTemplateArgs().asArray();
     }
@@ -1054,12 +1054,11 @@ BuildDeductionGuideForTypeAlias(Sema &SemaRef,
   // such that T can be deduced as U.
   auto RType = F->getTemplatedDecl()->getReturnType();
   // The (trailing) return type of the deduction guide.
-  const TemplateSpecializationType *FReturnType =
-      RType->getAs<TemplateSpecializationType>();
+  const auto *FReturnType = RType->getAs<TemplateSpecializationType>();
   if (const auto *ICNT = RType->getAsCanonical<InjectedClassNameType>())
     // implicitly-generated deduction guide.
     FReturnType = cast<TemplateSpecializationType>(
-        ICNT->getOriginalDecl()->getCanonicalTemplateSpecializationType(
+        ICNT->getDecl()->getCanonicalTemplateSpecializationType(
             SemaRef.Context));
   assert(FReturnType && "expected to see a return type");
   // Deduce template arguments of the deduction guide f from the RHS of
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 7b05e4c..bec2820 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -1681,7 +1681,7 @@ namespace {
           ICNT && SemaRef.CodeSynthesisContexts.back().Kind ==
                       Sema::CodeSynthesisContext::BuildingDeductionGuides) {
         Type = inherited::TransformType(
-            ICNT->getOriginalDecl()->getCanonicalTemplateSpecializationType(
+            ICNT->getDecl()->getCanonicalTemplateSpecializationType(
                 SemaRef.Context));
         TLB.pushTrivial(SemaRef.Context, Type, TL.getNameLoc());
       }
@@ -2105,7 +2105,7 @@ TemplateInstantiator::TransformFirstQualifierInScope(NamedDecl *D,
         return cast_or_null<NamedDecl>(TransformDecl(Loc, D));
 
       if (const TagType *Tag = T->getAs<TagType>())
-        return Tag->getOriginalDecl();
+        return Tag->getDecl();
 
       // The resulting type is not a tag; complain.
       getSema().Diag(Loc, diag::err_nested_name_spec_non_tag) << T;
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 73fd33a..468bc1d 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1522,9 +1522,9 @@ Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D,
   // If the old typedef was the name for linkage purposes of an anonymous
   // tag decl, re-establish that relationship for the new typedef.
   if (const TagType *oldTagType = D->getUnderlyingType()->getAs<TagType>()) {
-    TagDecl *oldTag = oldTagType->getOriginalDecl();
+    TagDecl *oldTag = oldTagType->getDecl();
     if (oldTag->getTypedefNameForAnonDecl() == D && !Invalid) {
-      TagDecl *newTag = DI->getType()->castAs<TagType>()->getOriginalDecl();
+      TagDecl *newTag = DI->getType()->castAs<TagType>()->getDecl();
       assert(!newTag->hasNameForLinkage());
       newTag->setTypedefNameForAnonDecl(Typedef);
     }
@@ -5791,7 +5791,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
       QualType TransformRecordType(TypeLocBuilder &TLB, RecordTypeLoc TL) {
         const RecordType *T = TL.getTypePtr();
         RecordDecl *Record = cast_or_null<RecordDecl>(
-            getDerived().TransformDecl(TL.getNameLoc(), T->getOriginalDecl()));
+            getDerived().TransformDecl(TL.getNameLoc(), T->getDecl()));
         if (Record != OldDecl)
           return Base::TransformRecordType(TLB, TL);
 
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index a9e7c34..638904d 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -1238,8 +1238,8 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     Result = S.GetTypeFromParser(DS.getRepAsType());
     assert(!Result.isNull() && "Didn't get a type for typeof?");
     if (!Result->isDependentType())
-      if (const TagType *TT = Result->getAs<TagType>())
-        S.DiagnoseUseOfDecl(TT->getOriginalDecl(), DS.getTypeSpecTypeLoc());
+      if (const auto *TT = Result->getAs<TagType>())
+        S.DiagnoseUseOfDecl(TT->getDecl(), DS.getTypeSpecTypeLoc());
     // TypeQuals handled by caller.
     Result = Context.getTypeOfType(
         Result, DS.getTypeSpecType() == DeclSpec::TST_typeof_unqualType
@@ -9699,7 +9699,7 @@ QualType Sema::BuildTypeofExprType(Expr *E, TypeOfKind Kind) {
   if (!E->isTypeDependent()) {
     QualType T = E->getType();
     if (const TagType *TT = T->getAs<TagType>())
-      DiagnoseUseOfDecl(TT->getOriginalDecl(), E->getExprLoc());
+      DiagnoseUseOfDecl(TT->getDecl(), E->getExprLoc());
   }
   return Context.getTypeOfExprType(E, Kind);
 }
@@ -9865,7 +9865,7 @@ QualType Sema::BuildPackIndexingType(QualType Pattern, Expr *IndexExpr,
 static QualType GetEnumUnderlyingType(Sema &S, QualType BaseType,
                                       SourceLocation Loc) {
   assert(BaseType->isEnumeralType());
-  EnumDecl *ED = BaseType->castAs<EnumType>()->getOriginalDecl();
+  EnumDecl *ED = BaseType->castAs<EnumType>()->getDecl();
 
   S.DiagnoseUseOfDecl(ED, Loc);
 
diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp
index 3e34675..3887796 100644
--- a/clang/lib/Sema/SemaTypeTraits.cpp
+++ b/clang/lib/Sema/SemaTypeTraits.cpp
@@ -1076,8 +1076,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     if (T.isPODType(C) || T->isObjCLifetimeType())
       return true;
     if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) {
-      if (RD->hasTrivialDefaultConstructor() &&
-          !RD->hasNonTrivialDefaultConstructor())
+      if (RD->hasTrivialDefaultConstructor())
         return true;
 
       bool FoundConstructor = false;
@@ -1165,14 +1164,26 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     const CXXDestructorDecl *Dtor = RD->getDestructor();
     if (UnqualT->isAggregateType() && (!Dtor || !Dtor->isUserProvided()))
       return true;
-    if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) {
-      for (CXXConstructorDecl *Ctr : RD->ctors()) {
-        if (Ctr->isIneligibleOrNotSelected() || Ctr->isDeleted())
-          continue;
-        if (Ctr->isTrivial())
-          return true;
-      }
+    bool HasTrivialNonDeletedDtr =
+        RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted());
+    if (!HasTrivialNonDeletedDtr)
+      return false;
+    for (CXXConstructorDecl *Ctr : RD->ctors()) {
+      if (Ctr->isIneligibleOrNotSelected() || Ctr->isDeleted())
+        continue;
+      if (Ctr->isTrivial())
+        return true;
     }
+    if (RD->needsImplicitDefaultConstructor() &&
+        RD->hasTrivialDefaultConstructor() &&
+        !RD->hasNonTrivialDefaultConstructor())
+      return true;
+    if (RD->needsImplicitCopyConstructor() && RD->hasTrivialCopyConstructor() &&
+        !RD->defaultedCopyConstructorIsDeleted())
+      return true;
+    if (RD->needsImplicitMoveConstructor() && RD->hasTrivialMoveConstructor() &&
+        !RD->defaultedMoveConstructorIsDeleted())
+      return true;
     return false;
   }
   case UTT_IsIntangibleType:
@@ -1613,9 +1624,9 @@ bool Sema::BuiltinIsBaseOf(SourceLocation RhsTLoc, QualType LhsT,
 
   // Unions are never base classes, and never have base classes.
   // It doesn't matter if they are complete or not. See PR#41843
-  if (lhsRecord && lhsRecord->getOriginalDecl()->isUnion())
+  if (lhsRecord && lhsRecord->getDecl()->isUnion())
     return false;
-  if (rhsRecord && rhsRecord->getOriginalDecl()->isUnion())
+  if (rhsRecord && rhsRecord->getDecl()->isUnion())
     return false;
 
   if (lhsRecord == rhsRecord)
@@ -1629,8 +1640,8 @@ bool Sema::BuiltinIsBaseOf(SourceLocation RhsTLoc, QualType LhsT,
                           diag::err_incomplete_type_used_in_type_trait_expr))
     return false;
 
-  return cast<CXXRecordDecl>(rhsRecord->getOriginalDecl())
-      ->isDerivedFrom(cast<CXXRecordDecl>(lhsRecord->getOriginalDecl()));
+  return cast<CXXRecordDecl>(rhsRecord->getDecl())
+      ->isDerivedFrom(cast<CXXRecordDecl>(lhsRecord->getDecl()));
 }
 
 static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT,
@@ -1670,9 +1681,8 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT,
                                  diag::err_incomplete_type))
       return false;
 
-    return cast<CXXRecordDecl>(DerivedRecord->getOriginalDecl())
-        ->isVirtuallyDerivedFrom(
-            cast<CXXRecordDecl>(BaseRecord->getOriginalDecl()));
+    return cast<CXXRecordDecl>(DerivedRecord->getDecl())
+        ->isVirtuallyDerivedFrom(cast<CXXRecordDecl>(BaseRecord->getDecl()));
   }
   case BTT_IsSame:
     return Self.Context.hasSameType(LhsT, RhsT);
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 04a5e4b..86896ab 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -7160,13 +7160,13 @@ QualType TreeTransform<Derived>::TransformTagType(TypeLocBuilder &TLB,
   }
 
   auto *TD = cast_or_null<TagDecl>(
-      getDerived().TransformDecl(TL.getNameLoc(), T->getOriginalDecl()));
+      getDerived().TransformDecl(TL.getNameLoc(), T->getDecl()));
   if (!TD)
     return QualType();
 
   QualType Result = TL.getType();
   if (getDerived().AlwaysRebuild() || QualifierLoc != TL.getQualifierLoc() ||
-      TD != T->getOriginalDecl()) {
+      TD != T->getDecl()) {
     if (T->isCanonicalUnqualified())
       Result = getDerived().RebuildCanonicalTagType(TD);
     else
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 32f7a0e..8b3fd41 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -5504,7 +5504,7 @@ void ASTReader::InitializeContext() {
             Error("Invalid FILE type in AST file");
             return;
           }
-          Context.setFILEDecl(Tag->getOriginalDecl());
+          Context.setFILEDecl(Tag->getDecl());
         }
       }
     }
@@ -5525,7 +5525,7 @@ void ASTReader::InitializeContext() {
             Error("Invalid jmp_buf type in AST file");
             return;
           }
-          Context.setjmp_bufDecl(Tag->getOriginalDecl());
+          Context.setjmp_bufDecl(Tag->getDecl());
         }
       }
     }
@@ -5543,7 +5543,7 @@ void ASTReader::InitializeContext() {
         else {
           const TagType *Tag = Sigjmp_bufType->getAs<TagType>();
           assert(Tag && "Invalid sigjmp_buf type in AST file");
-          Context.setsigjmp_bufDecl(Tag->getOriginalDecl());
+          Context.setsigjmp_bufDecl(Tag->getDecl());
         }
       }
     }
@@ -5578,7 +5578,7 @@ void ASTReader::InitializeContext() {
         else {
           const TagType *Tag = Ucontext_tType->getAs<TagType>();
           assert(Tag && "Invalid ucontext_t type in AST file");
-          Context.setucontext_tDecl(Tag->getOriginalDecl());
+          Context.setucontext_tDecl(Tag->getDecl());
         }
       }
     }
diff --git a/clang/lib/Serialization/TemplateArgumentHasher.cpp b/clang/lib/Serialization/TemplateArgumentHasher.cpp
index 3e8ffea..353e8a2 100644
--- a/clang/lib/Serialization/TemplateArgumentHasher.cpp
+++ b/clang/lib/Serialization/TemplateArgumentHasher.cpp
@@ -358,7 +358,7 @@ public:
     AddQualType(T->getReplacementType());
   }
 
-  void VisitTagType(const TagType *T) { AddDecl(T->getOriginalDecl()); }
+  void VisitTagType(const TagType *T) { AddDecl(T->getDecl()); }
 
   void VisitRecordType(const RecordType *T) { VisitTagType(T); }
   void VisitEnumType(const EnumType *T) { VisitTagType(T); }
diff --git a/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
index b304350..7cc146e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
@@ -246,7 +246,7 @@ public:
   bool Find(const TypedValueRegion *R) {
     QualType T = R->getValueType();
     if (const RecordType *RT = T->getAsStructureType()) {
-      const RecordDecl *RD = RT->getOriginalDecl()->getDefinition();
+      const RecordDecl *RD = RT->getDecl()->getDefinition();
       assert(RD && "Referred record has no definition");
       for (const auto *I : RD->fields()) {
         if (I->isUnnamedBitField())
diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
index 17af1ae..5e75c1c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
@@ -154,15 +154,15 @@ void WalkAST::VisitCallExpr(CallExpr *CE) {
           .Case("mkstemp", &WalkAST::checkCall_mkstemp)
           .Case("mkdtemp", &WalkAST::checkCall_mkstemp)
           .Case("mkstemps", &WalkAST::checkCall_mkstemp)
-          .Cases("strcpy", "__strcpy_chk", &WalkAST::checkCall_strcpy)
-          .Cases("strcat", "__strcat_chk", &WalkAST::checkCall_strcat)
-          .Cases("sprintf", "vsprintf", "scanf", "wscanf", "fscanf", "fwscanf",
-                 "vscanf", "vwscanf", "vfscanf", "vfwscanf",
+          .Cases({"strcpy", "__strcpy_chk"}, &WalkAST::checkCall_strcpy)
+          .Cases({"strcat", "__strcat_chk"}, &WalkAST::checkCall_strcat)
+          .Cases({"sprintf", "vsprintf", "scanf", "wscanf", "fscanf", "fwscanf",
+                  "vscanf", "vwscanf", "vfscanf", "vfwscanf"},
                  &WalkAST::checkDeprecatedOrUnsafeBufferHandling)
-          .Cases("sscanf", "swscanf", "vsscanf", "vswscanf", "swprintf",
-                 "snprintf", "vswprintf", "vsnprintf", "memcpy", "memmove",
+          .Cases({"sscanf", "swscanf", "vsscanf", "vswscanf", "swprintf",
+                  "snprintf", "vswprintf", "vsnprintf", "memcpy", "memmove"},
                  &WalkAST::checkDeprecatedOrUnsafeBufferHandling)
-          .Cases("strncpy", "strncat", "memset", "fprintf",
+          .Cases({"strncpy", "strncat", "memset", "fprintf"},
                  &WalkAST::checkDeprecatedOrUnsafeBufferHandling)
           .Case("drand48", &WalkAST::checkCall_rand)
           .Case("erand48", &WalkAST::checkCall_rand)
@@ -766,12 +766,14 @@ void WalkAST::checkDeprecatedOrUnsafeBufferHandling(const CallExpr *CE,
 
   int ArgIndex =
       llvm::StringSwitch<int>(Name)
-          .Cases("scanf", "wscanf", "vscanf", "vwscanf", 0)
-          .Cases("fscanf", "fwscanf", "vfscanf", "vfwscanf", "sscanf",
-                 "swscanf", "vsscanf", "vswscanf", 1)
-          .Cases("sprintf", "vsprintf", "fprintf", 1)
-          .Cases("swprintf", "snprintf", "vswprintf", "vsnprintf", "memcpy",
-                 "memmove", "memset", "strncpy", "strncat", DEPR_ONLY)
+          .Cases({"scanf", "wscanf", "vscanf", "vwscanf"}, 0)
+          .Cases({"fscanf", "fwscanf", "vfscanf", "vfwscanf", "sscanf",
+                  "swscanf", "vsscanf", "vswscanf"},
+                 1)
+          .Cases({"sprintf", "vsprintf", "fprintf"}, 1)
+          .Cases({"swprintf", "snprintf", "vswprintf", "vsnprintf", "memcpy",
+                  "memmove", "memset", "strncpy", "strncat"},
+                 DEPR_ONLY)
           .Default(UNKNOWN_CALL);
 
   assert(ArgIndex != UNKNOWN_CALL && "Unsupported function");
diff --git a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
index b1a7cd7..bc67391 100644
--- a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
@@ -148,9 +148,8 @@ void NonNullParamChecker::checkPreCall(const CallEvent &Call,
 
       QualType T = ArgE->getType();
       const RecordType *UT = T->getAsUnionType();
-      if (!UT || !UT->getOriginalDecl()
-                      ->getMostRecentDecl()
-                      ->hasAttr<TransparentUnionAttr>())
+      if (!UT ||
+          !UT->getDecl()->getMostRecentDecl()->hasAttr<TransparentUnionAttr>())
         continue;
 
       auto CSV = DV->getAs<nonloc::CompoundVal>();
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
index c1a5000..419d263 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
@@ -164,7 +164,9 @@ bool tryToFindPtrOrigin(
 
         auto Name = safeGetName(callee);
         if (Name == "__builtin___CFStringMakeConstantString" ||
-            Name == "NSClassFromString")
+            Name == "NSStringFromSelector" || Name == "NSSelectorFromString" ||
+            Name == "NSStringFromClass" || Name == "NSClassFromString" ||
+            Name == "NSStringFromProtocol" || Name == "NSProtocolFromString")
           return callback(E, true);
       } else if (auto *CalleeE = call->getCallee()) {
         if (auto *E = dyn_cast<DeclRefExpr>(CalleeE->IgnoreParenCasts())) {
@@ -182,7 +184,7 @@ bool tryToFindPtrOrigin(
           if (auto *Subst = dyn_cast<SubstTemplateTypeParmType>(RetType)) {
             if (auto *SubstType = Subst->desugar().getTypePtr()) {
               if (auto *RD = dyn_cast<RecordType>(SubstType)) {
-                if (auto *CXX = dyn_cast<CXXRecordDecl>(RD->getOriginalDecl()))
+                if (auto *CXX = dyn_cast<CXXRecordDecl>(RD->getDecl()))
                   if (isSafePtr(CXX))
                     return callback(E, true);
               }
@@ -202,6 +204,8 @@ bool tryToFindPtrOrigin(
           !Selector.getNumArgs())
         return callback(E, true);
     }
+    if (auto *ObjCProtocol = dyn_cast<ObjCProtocolExpr>(E))
+      return callback(ObjCProtocol, true);
     if (auto *ObjCDict = dyn_cast<ObjCDictionaryLiteral>(E))
       return callback(ObjCDict, true);
     if (auto *ObjCArray = dyn_cast<ObjCArrayLiteral>(E))
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ForwardDeclChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ForwardDeclChecker.cpp
index d8539ea..1d4e6dd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ForwardDeclChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ForwardDeclChecker.cpp
@@ -263,18 +263,43 @@ public:
   void visitCallArg(const Expr *Arg, const ParmVarDecl *Param,
                     const Decl *DeclWithIssue) const {
     auto *ArgExpr = Arg->IgnoreParenCasts();
-    if (auto *InnerCE = dyn_cast<CallExpr>(Arg)) {
-      auto *InnerCallee = InnerCE->getDirectCallee();
-      if (InnerCallee && InnerCallee->isInStdNamespace() &&
-          safeGetName(InnerCallee) == "move" && InnerCE->getNumArgs() == 1) {
-        ArgExpr = InnerCE->getArg(0);
-        if (ArgExpr)
-          ArgExpr = ArgExpr->IgnoreParenCasts();
+    while (ArgExpr) {
+      ArgExpr = ArgExpr->IgnoreParenCasts();
+      if (auto *InnerCE = dyn_cast<CallExpr>(ArgExpr)) {
+        auto *InnerCallee = InnerCE->getDirectCallee();
+        if (InnerCallee && InnerCallee->isInStdNamespace() &&
+            safeGetName(InnerCallee) == "move" && InnerCE->getNumArgs() == 1) {
+          ArgExpr = InnerCE->getArg(0);
+          continue;
+        }
+      }
+      if (auto *UO = dyn_cast<UnaryOperator>(ArgExpr)) {
+        auto OpCode = UO->getOpcode();
+        if (OpCode == UO_Deref || OpCode == UO_AddrOf) {
+          ArgExpr = UO->getSubExpr();
+          continue;
+        }
       }
+      break;
     }
+
+    if (auto *MemberCallExpr = dyn_cast<CXXMemberCallExpr>(ArgExpr)) {
+      if (isOwnerPtrType(MemberCallExpr->getObjectType()))
+        return;
+    }
+
+    if (auto *OpCE = dyn_cast<CXXOperatorCallExpr>(ArgExpr)) {
+      auto *Method = dyn_cast_or_null<CXXMethodDecl>(OpCE->getDirectCallee());
+      if (Method && isOwnerPtr(safeGetName(Method->getParent()))) {
+        if (OpCE->getOperator() == OO_Star && OpCE->getNumArgs() == 1)
+          return;
+      }
+    }
+
     if (isNullPtr(ArgExpr) || isa<IntegerLiteral>(ArgExpr) ||
         isa<CXXDefaultArgExpr>(ArgExpr))
       return;
+
     if (auto *DRE = dyn_cast<DeclRefExpr>(ArgExpr)) {
       if (auto *ValDecl = DRE->getDecl()) {
         if (isa<ParmVarDecl>(ValDecl))
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
index e5c74bb..d3d1f13 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
@@ -138,6 +138,11 @@ bool isCheckedPtr(const std::string &Name) {
   return Name == "CheckedPtr" || Name == "CheckedRef";
 }
 
+bool isOwnerPtr(const std::string &Name) {
+  return isRefType(Name) || isCheckedPtr(Name) || Name == "unique_ptr" ||
+         Name == "UniqueRef" || Name == "LazyUniqueRef";
+}
+
 bool isSmartPtrClass(const std::string &Name) {
   return isRefType(Name) || isCheckedPtr(Name) || isRetainPtrOrOSPtr(Name) ||
          Name == "WeakPtr" || Name == "WeakPtrFactory" ||
@@ -206,10 +211,7 @@ bool isRetainPtrOrOSPtrType(const clang::QualType T) {
 }
 
 bool isOwnerPtrType(const clang::QualType T) {
-  return isPtrOfType(T, [](auto Name) {
-    return isRefType(Name) || isCheckedPtr(Name) || Name == "unique_ptr" ||
-           Name == "UniqueRef" || Name == "LazyUniqueRef";
-  });
+  return isPtrOfType(T, [](auto Name) { return isOwnerPtr(Name); });
 }
 
 std::optional<bool> isUncounted(const QualType T) {
@@ -255,7 +257,7 @@ void RetainTypeChecker::visitTypedef(const TypedefDecl *TD) {
     return;
   }
 
-  for (auto *Redecl : RT->getOriginalDecl()->getMostRecentDecl()->redecls()) {
+  for (auto *Redecl : RT->getDecl()->getMostRecentDecl()->redecls()) {
     if (Redecl->getAttr<ObjCBridgeAttr>() ||
         Redecl->getAttr<ObjCBridgeMutableAttr>()) {
       CFPointees.insert(RT);
@@ -296,7 +298,7 @@ std::optional<bool> isUnretained(const QualType T, bool IsARCEnabled) {
   auto *Record = PointeeType->getAsStructureType();
   if (!Record)
     return false;
-  auto *Decl = Record->getOriginalDecl();
+  auto *Decl = Record->getDecl();
   if (!Decl)
     return false;
   auto TypeName = Decl->getName();
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
index 8300a6c..12e2e2d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
@@ -143,6 +143,10 @@ bool isCheckedPtr(const std::string &Name);
 /// \returns true if \p Name is RetainPtr or its variant, false if not.
 bool isRetainPtrOrOSPtr(const std::string &Name);
 
+/// \returns true if \p Name is an owning smar pointer such as Ref, CheckedPtr,
+/// and unique_ptr.
+bool isOwnerPtr(const std::string &Name);
+
 /// \returns true if \p Name is a smart pointer type name, false if not.
 bool isSmartPtrClass(const std::string &Name);
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
index 6f3a280..c6421f8 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
@@ -121,13 +121,13 @@ public:
                 return true;
             }
           } else if (auto *RD = dyn_cast<RecordType>(PointeeType)) {
-            if (declaresSameEntity(RD->getOriginalDecl(), ClassDecl))
+            if (declaresSameEntity(RD->getDecl(), ClassDecl))
               return true;
           } else if (auto *ST =
                          dyn_cast<SubstTemplateTypeParmType>(PointeeType)) {
             auto Type = ST->getReplacementType();
             if (auto *RD = dyn_cast<RecordType>(Type)) {
-              if (declaresSameEntity(RD->getOriginalDecl(), ClassDecl))
+              if (declaresSameEntity(RD->getDecl(), ClassDecl))
                 return true;
             }
           }
diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index 06ba015..62460cc 100644
--- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -89,7 +89,7 @@ static bool isCallback(QualType T) {
     T = T->getPointeeType();
 
   if (const RecordType *RT = T->getAsStructureType()) {
-    const RecordDecl *RD = RT->getOriginalDecl()->getDefinitionOrSelf();
+    const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
     for (const auto *I : RD->fields()) {
       QualType FieldT = I->getType();
       if (FieldT->isBlockPointerType() || FieldT->isFunctionPointerType())
@@ -391,9 +391,8 @@ bool CallEvent::isVariadic(const Decl *D) {
 
 static bool isTransparentUnion(QualType T) {
   const RecordType *UT = T->getAsUnionType();
-  return UT && UT->getOriginalDecl()
-                   ->getMostRecentDecl()
-                   ->hasAttr<TransparentUnionAttr>();
+  return UT &&
+         UT->getDecl()->getMostRecentDecl()->hasAttr<TransparentUnionAttr>();
 }
 
 // In some cases, symbolic cases should be transformed before we associate
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index af0ef52..2838533 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -2457,7 +2457,7 @@ NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B,
 SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B,
                                              const TypedValueRegion *R) {
   const RecordDecl *RD =
-      R->getValueType()->castAsCanonical<RecordType>()->getOriginalDecl();
+      R->getValueType()->castAsCanonical<RecordType>()->getDecl();
   if (!RD->getDefinition())
     return UnknownVal();
 
diff --git a/clang/test/Analysis/Checkers/WebKit/forward-decl-checker.mm b/clang/test/Analysis/Checkers/WebKit/forward-decl-checker.mm
index 104b555..8aad838 100644
--- a/clang/test/Analysis/Checkers/WebKit/forward-decl-checker.mm
+++ b/clang/test/Analysis/Checkers/WebKit/forward-decl-checker.mm
@@ -11,6 +11,8 @@ class Obj;
 
 Obj* provide_obj_ptr();
 void receive_obj_ptr(Obj* p = nullptr);
+void receive_obj_ref(Obj&);
+void receive_obj_rref(Obj&&);
 sqlite3* open_db();
 void close_db(sqlite3*);
 
@@ -38,6 +40,16 @@ Obj& ref() {
   return obj;
 }
 
+void opaque_call_arg(Obj* obj, Obj&& otherObj, const RefPtr<Obj>& safeObj, WeakPtr<Obj> weakObj, std::unique_ptr<Obj>& uniqObj) {
+  receive_obj_ref(*obj);
+  receive_obj_ptr(&*obj);
+  receive_obj_rref(std::move(otherObj));
+  receive_obj_ref(*safeObj.get());
+  receive_obj_ptr(weakObj.get());
+  // expected-warning@-1{{Call argument for parameter 'p' uses a forward declared type 'Obj *'}}
+  receive_obj_ref(*uniqObj);
+}
+
 Obj&& provide_obj_rval();
 void receive_obj_rval(Obj&& p);
 
diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h
index a49faa1..7055a94 100644
--- a/clang/test/Analysis/Checkers/WebKit/mock-types.h
+++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h
@@ -25,23 +25,23 @@ namespace std {
 template <typename T>
 class unique_ptr {
 private:
-  T *t;
+  void *t;
 
 public:
   unique_ptr() : t(nullptr) { }
   unique_ptr(T *t) : t(t) { }
   ~unique_ptr() {
     if (t)
-      delete t;
+      delete static_cast<T*>(t);
   }
   template <typename U> unique_ptr(unique_ptr<U>&& u)
     : t(u.t)
   {
     u.t = nullptr;
   }
-  T *get() const { return t; }
-  T *operator->() const { return t; }
-  T &operator*() const { return *t; }
+  T *get() const { return static_cast<T*>(t); }
+  T *operator->() const { return get(); }
+  T &operator*() const { return *get(); }
   unique_ptr &operator=(T *) { return *this; }
   explicit operator bool() const { return !!t; }
 };
@@ -313,4 +313,90 @@ public:
   UniqueRef &operator=(T &) { return *this; }
 };
 
+class WeakPtrImpl {
+private:
+  void* ptr { nullptr };
+  mutable unsigned m_refCount { 0 };
+
+  template <typename U> friend class CanMakeWeakPtr;
+  template <typename U> friend class WeakPtr;
+
+public:
+  template <typename T>
+  static Ref<WeakPtrImpl> create(T& t)
+  {
+    return adoptRef(*new WeakPtrImpl(t));
+  }
+
+  void ref() const { m_refCount++; }
+  void deref() const {
+    m_refCount--;
+    if (!m_refCount)
+      delete const_cast<WeakPtrImpl*>(this);
+  }
+
+  template <typename T>
+  T* get() { return static_cast<T*>(ptr); }
+  operator bool() const { return !!ptr; }
+  void clear() { ptr = nullptr; }
+
+private:
+  template <typename T>
+  WeakPtrImpl(T* t)
+    : ptr(static_cast<void*>(t))
+  { }
+};
+
+template <typename T>
+class CanMakeWeakPtr {
+private:
+  RefPtr<WeakPtrImpl> impl;
+
+  template <typename U> friend class CanMakeWeakPtr;
+  template <typename U> friend class WeakPtr;
+
+  Ref<WeakPtrImpl> createWeakPtrImpl() {
+    if (!impl)
+      impl = WeakPtrImpl::create(static_cast<T>(*this));
+    return *impl;
+  }
+
+public:
+  ~CanMakeWeakPtr() {
+    if (!impl)
+      return;
+    impl->clear();
+    impl = nullptr;
+  }
+};
+
+template <typename T>
+class WeakPtr {
+private:
+  RefPtr<WeakPtrImpl> impl;
+
+public:
+  WeakPtr(T& t) {
+    *this = t;
+  }
+  WeakPtr(T* t) {
+    *this = t;
+  }
+
+  template <typename U>
+  WeakPtr<T> operator=(U& obj) {
+    impl = obj.createWeakPtrImpl();
+  }
+
+  template <typename U>
+  WeakPtr<T> operator=(U* obj) {
+    impl = obj ? obj->createWeakPtrImpl() : nullptr;
+  }
+
+  T* get() {
+    return impl ? impl->get<T>() : nullptr;
+  }
+
+};
+
 #endif
diff --git a/clang/test/Analysis/Checkers/WebKit/objc-mock-types.h b/clang/test/Analysis/Checkers/WebKit/objc-mock-types.h
index a5fc3d7..edf4011 100644
--- a/clang/test/Analysis/Checkers/WebKit/objc-mock-types.h
+++ b/clang/test/Analysis/Checkers/WebKit/objc-mock-types.h
@@ -98,12 +98,20 @@ typedef CVImageBufferRef CVPixelBufferRef;
 typedef signed int CVReturn;
 CVReturn CVPixelBufferCreateWithIOSurface(CFAllocatorRef allocator, IOSurfaceRef surface, CFDictionaryRef pixelBufferAttributes, CF_RETURNS_RETAINED CVPixelBufferRef * pixelBufferOut);
 
+extern "C" NSString *NSStringFromSelector(SEL aSelector);
+extern "C" SEL NSSelectorFromString(NSString *aSelectorName);
+
+extern "C" NSString *NSStringFromClass(Class aClass);
+extern "C" Class NSClassFromString(NSString *aClassName);
+
+extern "C" NSString *NSStringFromProtocol(Protocol *proto);
+extern "C" Protocol * NSProtocolFromString(NSString *namestr);
+
 CFRunLoopRef CFRunLoopGetCurrent(void);
 CFRunLoopRef CFRunLoopGetMain(void);
 extern CFTypeRef CFRetain(CFTypeRef cf);
 extern void CFRelease(CFTypeRef cf);
 #define CFSTR(cStr) ((CFStringRef) __builtin___CFStringMakeConstantString ("" cStr ""))
-extern Class NSClassFromString(NSString *aClassName);
 
 #if __has_feature(objc_arc)
 id CFBridgingRelease(CFTypeRef X) {
diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
index 5dc3b38..4f231ee 100644
--- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
+++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
@@ -578,6 +578,24 @@ void foo() {
 
 } // autoreleased
 
+namespace sel_string {
+
+void consumeStr(NSString *);
+void consumeSel(SEL);
+void consumeClass(Class);
+void consumeProtocol(Protocol *);
+
+void foo() {
+  consumeStr(NSStringFromSelector(@selector(mutableCopy)));
+  consumeSel(NSSelectorFromString(@"mutableCopy"));
+  consumeStr(NSStringFromClass(NSNumber.class));
+  consumeClass(NSClassFromString(@"NSNumber"));
+  consumeStr(NSStringFromProtocol(@protocol(NSCopying)));
+  consumeProtocol(NSProtocolFromString(@"NSCopying"));
+}
+
+} // namespace sel_string
+
 @interface TestObject : NSObject
 - (void)doWork:(NSString *)msg, ...;
 - (void)doWorkOnSelf;
diff --git a/clang/test/C/C2y/n3364.c b/clang/test/C/C2y/n3364.c
index d75f17d..f95c77f 100644
--- a/clang/test/C/C2y/n3364.c
+++ b/clang/test/C/C2y/n3364.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -verify -std=c2y -ffreestanding -Wall -pedantic -emit-llvm -o - %s
-// RUN: %clang_cc1 -verify -ffreestanding -Wall -pedantic -emit-llvm -o - %s
+// RUN: %clang_cc1 -verify -std=c2y -ffreestanding -Wall -pedantic -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -verify -ffreestanding -Wall -pedantic -emit-llvm -o - %s | FileCheck %s
 // expected-no-diagnostics
 
 /* WG14 N3364: Yes
@@ -23,20 +23,20 @@
 float f1 = FLT_SNAN;
 float f2 = +FLT_SNAN;
 float f3 = -FLT_SNAN;
-// CHECK: @f1 = {{.*}}global float 0x7FF0000020000000
-// CHECK: @f2 = {{.*}}global float 0x7FF0000020000000
-// CHECK: @f3 = {{.*}}global float 0xFFF0000020000000
+// CHECK: @f1 = {{.*}}global float 0x7FF4000000000000
+// CHECK: @f2 = {{.*}}global float 0x7FF4000000000000
+// CHECK: @f3 = {{.*}}global float 0xFFF4000000000000
 
 double d1 = DBL_SNAN;
 double d2 = +DBL_SNAN;
 double d3 = -DBL_SNAN;
-// CHECK: @d1 = {{.*}}global double 0x7FF0000000000001
-// CHECK: @d2 = {{.*}}global double 0x7FF0000000000001
-// CHECK: @d3 = {{.*}}global double 0xFFF0000000000001
+// CHECK: @d1 = {{.*}}global double 0x7FF4000000000000
+// CHECK: @d2 = {{.*}}global double 0x7FF4000000000000
+// CHECK: @d3 = {{.*}}global double 0xFFF4000000000000
 
 long double ld1 = LDBL_SNAN;
 long double ld2 = +LDBL_SNAN;
 long double ld3 = -LDBL_SNAN;
-// CHECK: @ld1 = {{.*}}global {{double 0x7FF0000000000001|x86_fp80 0xK7FFF8000000000000001|fp128 0xL00000000000000017FFF000000000000}}
-// CHECK: @ld2 = {{.*}}global {{double 0x7FF0000000000001|x86_fp80 0xK7FFF8000000000000001|fp128 0xL00000000000000017FFF000000000000}}
-// CHECK: @ld3 = {{.*}}global {{double 0xFFF0000000000001|x86_fp80 0xKFFFF8000000000000001|fp128 0xL0000000000000001FFFF000000000000}}
+// CHECK: @ld1 = {{.*}}global {{double 0x7FF4000000000000|x86_fp80 0xK7FFFA000000000000000|fp128 0xL00000000000000007FFF400000000000}}
+// CHECK: @ld2 = {{.*}}global {{double 0x7FF4000000000000|x86_fp80 0xK7FFFA000000000000000|fp128 0xL00000000000000007FFF400000000000}}
+// CHECK: @ld3 = {{.*}}global {{double 0xFFF4000000000000|x86_fp80 0xKFFFFA000000000000000|fp128 0xL0000000000000000FFFF400000000000}}
diff --git a/clang/test/CIR/CodeGen/throws.cpp b/clang/test/CIR/CodeGen/throws.cpp
index ff6aa62..4255d43 100644
--- a/clang/test/CIR/CodeGen/throws.cpp
+++ b/clang/test/CIR/CodeGen/throws.cpp
@@ -123,3 +123,24 @@ void paren_expr() { (throw 0, 1 + 2); }
 // OGCG: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 4)
 // OGCG: store i32 0, ptr %[[EXCEPTION_ADDR]], align 16
 // OGCG: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIi, ptr null)
+
+void throw_complex_expr() {
+  throw __builtin_complex(1.1f, 2.2f);
+}
+
+// CIR: %[[EXCEPTION_ADDR:.*]] = cir.alloc.exception 8 -> !cir.ptr<!cir.complex<!cir.float>>
+// CIR: %[[EXCEPTION_VALUE:.*]] = cir.const #cir.const_complex<#cir.fp<1.100000e+00> : !cir.float, #cir.fp<2.200000e+00> : !cir.float> : !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[EXCEPTION_VALUE]], %[[EXCEPTION_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+// CIR: cir.throw %[[EXCEPTION_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, @_ZTICf
+// CIR: cir.unreachable
+
+// LLVM: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 8)
+// LLVM: store { float, float } { float 0x3FF19999A0000000, float 0x40019999A0000000 }, ptr %[[EXCEPTION_ADDR]], align 16
+// LLVM: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTICf, ptr null)
+
+// OGCG: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 8)
+// OGCG: %[[EXCEPTION_REAL:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[EXCEPTION_ADDR]], i32 0, i32 0
+// OGCG: %[[EXCEPTION_IMAG:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[EXCEPTION_ADDR]], i32 0, i32 1
+// OGCG: store float 0x3FF19999A0000000, ptr %[[EXCEPTION_REAL]], align 16
+// OGCG: store float 0x40019999A0000000, ptr %[[EXCEPTION_IMAG]], align 4
+// OGCG: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTICf, ptr null)
diff --git a/clang/test/CIR/CodeGen/vla.c b/clang/test/CIR/CodeGen/vla.c
new file mode 100644
index 0000000..e2adf45
--- /dev/null
+++ b/clang/test/CIR/CodeGen/vla.c
@@ -0,0 +1,285 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+void f0(int len) {
+  int arr[len];
+}
+
+// CIR: cir.func{{.*}} @f0(%[[LEN_ARG:.*]]: !s32i {{.*}})
+// CIR:   %[[LEN_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["len", init]
+// CIR:   %[[SAVED_STACK:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"]
+// CIR:   cir.store{{.*}} %[[LEN_ARG]], %[[LEN_ADDR]]
+// CIR:   %[[LEN:.*]] = cir.load{{.*}} %[[LEN_ADDR]]
+// CIR:   %[[LEN_SIZE_T:.*]] = cir.cast integral %[[LEN]] : !s32i -> !u64i
+// CIR:   %[[STACK_PTR:.*]] = cir.stacksave
+// CIR:   cir.store{{.*}} %[[STACK_PTR]], %[[SAVED_STACK]]
+// CIR:   %[[ARR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %[[LEN_SIZE_T]] : !u64i, ["arr"]
+// CIR:   %[[STACK_RESTORE_PTR:.*]] = cir.load{{.*}} %[[SAVED_STACK]]
+// CIR:   cir.stackrestore %[[STACK_RESTORE_PTR]]
+
+// LLVM: define{{.*}} void @f0(i32 %[[LEN_ARG:.*]]) {
+// LLVM:   %[[LEN_ADDR:.*]] = alloca i32
+// LLVM:   %[[SAVED_STACK:.*]] = alloca ptr
+// LLVM:   store i32 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// LLVM:   %[[LEN:.*]] = load i32, ptr %[[LEN_ADDR]]
+// LLVM:   %[[LEN_SIZE_T:.*]] = sext i32 %[[LEN]] to i64
+// LLVM:   %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// LLVM:   store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// LLVM:   %[[ARR:.*]] = alloca i32, i64 %[[LEN_SIZE_T]]
+// LLVM:   %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// LLVM:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+
+// Note: VLA_EXPR0 below is emitted to capture debug info.
+
+// OGCG: define{{.*}} void @f0(i32 {{.*}} %[[LEN_ARG:.*]])
+// OGCG:   %[[LEN_ADDR:.*]] = alloca i32
+// OGCG:   %[[SAVED_STACK:.*]] = alloca ptr
+// OGCG:   %[[VLA_EXPR0:.*]] = alloca i64
+// OGCG:   store i32 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// OGCG:   %[[LEN:.*]] = load i32, ptr %[[LEN_ADDR]]
+// OGCG:   %[[LEN_SIZE_T:.*]] = zext i32 %[[LEN]] to i64
+// OGCG:   %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// OGCG:   store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// OGCG:   %[[ARR:.*]] = alloca i32, i64 %[[LEN_SIZE_T]]
+// OGCG:   store i64 %[[LEN_SIZE_T]], ptr %[[VLA_EXPR0]]
+// OGCG:   %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// OGCG:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+
+void f1(int len) {
+  int arr[16][len];
+}
+
+// CIR: cir.func{{.*}} @f1(%[[LEN_ARG:.*]]: !s32i {{.*}})
+// CIR:   %[[LEN_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["len", init]
+// CIR:   %[[SAVED_STACK:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"]
+// CIR:   cir.store{{.*}} %[[LEN_ARG]], %[[LEN_ADDR]]
+// CIR:   %[[SIXTEEN:.*]] = cir.const #cir.int<16> : !s32i
+// CIR:   %[[SIXTEEN_SIZE_T:.*]] = cir.cast integral %[[SIXTEEN]] : !s32i -> !u64i
+// CIR:   %[[LEN:.*]] = cir.load{{.*}} %[[LEN_ADDR]]
+// CIR:   %[[LEN_SIZE_T:.*]] = cir.cast integral %[[LEN]] : !s32i -> !u64i
+// CIR:   %[[STACK_PTR:.*]] = cir.stacksave
+// CIR:   cir.store{{.*}} %[[STACK_PTR]], %[[SAVED_STACK]]
+// CIR:   %[[TOTAL_LEN:.*]] = cir.binop(mul, %[[SIXTEEN_SIZE_T]], %[[LEN_SIZE_T]]) nuw
+// CIR:   %[[ARR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %[[TOTAL_LEN]] : !u64i, ["arr"]
+// CIR:   %[[STACK_RESTORE_PTR:.*]] = cir.load{{.*}} %[[SAVED_STACK]]
+// CIR:   cir.stackrestore %[[STACK_RESTORE_PTR]]
+
+// LLVM: define{{.*}} void @f1(i32 %[[LEN_ARG:.*]]) {
+// LLVM:   %[[LEN_ADDR:.*]] = alloca i32
+// LLVM:   %[[SAVED_STACK:.*]] = alloca ptr
+// LLVM:   store i32 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// LLVM:   %[[LEN:.*]] = load i32, ptr %[[LEN_ADDR]]
+// LLVM:   %[[LEN_SIZE_T:.*]] = sext i32 %[[LEN]] to i64
+// LLVM:   %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// LLVM:   store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// LLVM:   %[[TOTAL_LEN:.*]] = mul nuw i64 16, %[[LEN_SIZE_T]]
+// LLVM:   %[[ARR:.*]] = alloca i32, i64 %[[TOTAL_LEN]]
+// LLVM:   %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// LLVM:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+
+// Note: VLA_EXPR0 below is emitted to capture debug info.
+
+// OGCG: define{{.*}} void @f1(i32 {{.*}} %[[LEN_ARG:.*]])
+// OGCG:   %[[LEN_ADDR:.*]] = alloca i32
+// OGCG:   %[[SAVED_STACK:.*]] = alloca ptr
+// OGCG:   %[[VLA_EXPR0:.*]] = alloca i64
+// OGCG:   store i32 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// OGCG:   %[[LEN:.*]] = load i32, ptr %[[LEN_ADDR]]
+// OGCG:   %[[LEN_SIZE_T:.*]] = zext i32 %[[LEN]] to i64
+// OGCG:   %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// OGCG:   store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// OGCG:   %[[TOTAL_LEN:.*]] = mul nuw i64 16, %[[LEN_SIZE_T]]
+// OGCG:   %[[ARR:.*]] = alloca i32, i64 %[[TOTAL_LEN]]
+// OGCG:   store i64 %[[LEN_SIZE_T]], ptr %[[VLA_EXPR0]]
+// OGCG:   %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// OGCG:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+
+void f2(int len) {
+  int arr[len + 4];
+}
+  
+// CIR: cir.func{{.*}} @f2(%[[LEN_ARG:.*]]: !s32i {{.*}})
+// CIR:   %[[LEN_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["len", init]
+// CIR:   %[[SAVED_STACK:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"]
+// CIR:   cir.store{{.*}} %[[LEN_ARG]], %[[LEN_ADDR]]
+// CIR:   %[[LEN:.*]] = cir.load{{.*}} %[[LEN_ADDR]]
+// CIR:   %[[FOUR:.*]] = cir.const #cir.int<4> : !s32i
+// CIR:   %[[TOTAL_LEN:.*]] = cir.binop(add, %[[LEN]], %[[FOUR]]) nsw : !s32i
+// CIR:   %[[TOTAL_LEN_SIZE_T:.*]] = cir.cast integral %[[TOTAL_LEN]] : !s32i -> !u64i
+// CIR:   %[[STACK_PTR:.*]] = cir.stacksave
+// CIR:   cir.store{{.*}} %[[STACK_PTR]], %[[SAVED_STACK]]
+// CIR:   %[[ARR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %[[TOTAL_LEN_SIZE_T]] : !u64i, ["arr"]
+// CIR:   %[[STACK_RESTORE_PTR:.*]] = cir.load{{.*}} %[[SAVED_STACK]]
+// CIR:   cir.stackrestore %[[STACK_RESTORE_PTR]]
+  
+// LLVM: define{{.*}} void @f2(i32 %[[LEN_ARG:.*]]) {
+// LLVM:   %[[LEN_ADDR:.*]] = alloca i32
+// LLVM:   %[[SAVED_STACK:.*]] = alloca ptr
+// LLVM:   store i32 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// LLVM:   %[[LEN:.*]] = load i32, ptr %[[LEN_ADDR]]
+// LLVM:   %[[TOTAL_LEN:.*]] = add nsw i32 %[[LEN]], 4
+// LLVM:   %[[TOTAL_LEN_SIZE_T:.*]] = sext i32 %[[TOTAL_LEN]] to i64
+// LLVM:   %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// LLVM:   store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// LLVM:   %[[ARR:.*]] = alloca i32, i64 %[[TOTAL_LEN_SIZE_T]]
+// LLVM:   %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// LLVM:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+  
+// Note: VLA_EXPR0 below is emitted to capture debug info.
+  
+// OGCG: define{{.*}} void @f2(i32 {{.*}} %[[LEN_ARG:.*]])
+// OGCG:   %[[LEN_ADDR:.*]] = alloca i32
+// OGCG:   %[[SAVED_STACK:.*]] = alloca ptr
+// OGCG:   %[[VLA_EXPR0:.*]] = alloca i64
+// OGCG:   store i32 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// OGCG:   %[[LEN:.*]] = load i32, ptr %[[LEN_ADDR]]
+// OGCG:   %[[TOTAL_LEN:.*]] = add nsw i32 %[[LEN]], 4
+// OGCG:   %[[TOTAL_LEN_SIZE_T:.*]] = zext i32 %[[TOTAL_LEN]] to i64
+// OGCG:   %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// OGCG:   store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// OGCG:   %[[ARR:.*]] = alloca i32, i64 %[[TOTAL_LEN_SIZE_T]]
+// OGCG:   store i64 %[[TOTAL_LEN_SIZE_T]], ptr %[[VLA_EXPR0]]
+// OGCG:   %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// OGCG:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+
+void f3(unsigned len) {
+  char s1[len];
+  unsigned i = 0u;
+  while (++i < len) {
+    char s2[i];
+  }
+}
+
+// CIR: cir.func{{.*}} @f3(%[[LEN_ARG:.*]]: !u32i {{.*}})
+// CIR:   %[[LEN_ADDR:.*]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["len", init]
+// CIR:   %[[SAVED_STACK:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"]
+// CIR:   cir.store{{.*}} %[[LEN_ARG]], %[[LEN_ADDR]]
+// CIR:   %[[LEN:.*]] = cir.load{{.*}} %[[LEN_ADDR]]
+// CIR:   %[[LEN_SIZE_T:.*]] = cir.cast integral %[[LEN]] : !u32i -> !u64i
+// CIR:   %[[STACK_PTR:.*]] = cir.stacksave
+// CIR:   cir.store{{.*}} %[[STACK_PTR]], %[[SAVED_STACK]]
+// CIR:   %[[S1:.*]] = cir.alloca !s8i, !cir.ptr<!s8i>, %[[LEN_SIZE_T]] : !u64i, ["s1"]
+// CIR:   %[[I:.*]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["i", init]
+// CIR:   %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i
+// CIR:   cir.store{{.*}} %[[ZERO]], %[[I]]
+// CIR:   cir.scope {
+// CIR:     cir.while {
+// CIR:     %[[CUR_I:.*]] = cir.load{{.*}} %[[I]]
+// CIR:     %[[NEXT:.*]] = cir.unary(inc, %[[CUR_I]])
+// CIR:     cir.store{{.*}} %[[NEXT]], %[[I]]
+// CIR:     %[[LEN2:.*]] = cir.load{{.*}} %[[LEN_ADDR]]
+// CIR:     %[[CMP:.*]] = cir.cmp(lt, %[[NEXT]], %[[LEN2]])
+// CIR:     cir.condition(%[[CMP]])
+// CIR:   } do {
+// CIR:       cir.scope {
+// CIR:         %[[SAVED_STACK2:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"]
+// CIR:         %[[I_LEN:.*]] = cir.load{{.*}} %[[I]]
+// CIR:         %[[I_LEN_SIZE_T2:.*]] = cir.cast integral %[[I_LEN]] : !u32i -> !u64i
+// CIR:         %[[STACK_PTR2:.*]] = cir.stacksave
+// CIR:         cir.store{{.*}} %[[STACK_PTR2]], %[[SAVED_STACK2]]
+// CIR:         %[[S2:.*]] = cir.alloca !s8i, !cir.ptr<!s8i>, %[[I_LEN_SIZE_T2]] : !u64i, ["s2"]
+// CIR:         %[[SAVED_RESTORE_PTR2:.*]] = cir.load{{.*}} %[[SAVED_STACK2]]
+// CIR:         cir.stackrestore %[[SAVED_RESTORE_PTR2]]
+// CIR:       }
+// CIR:       cir.yield
+// CIR:     }
+// CIR:   }
+// CIR:   %[[STACK_RESTORE_PTR:.*]] = cir.load{{.*}} %[[SAVED_STACK]]
+// CIR:   cir.stackrestore %[[STACK_RESTORE_PTR]]
+
+// LLVM: define{{.*}} void @f3(i32 %[[LEN_ARG:.*]]) {
+// LLVM:   %[[SAVED_STACK2:.*]] = alloca ptr
+// LLVM:   %[[LEN_ADDR:.*]] = alloca i32
+// LLVM:   %[[SAVED_STACK:.*]] = alloca ptr
+// LLVM:   store i32 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// LLVM:   %[[LEN:.*]] = load i32, ptr %[[LEN_ADDR]]
+// LLVM:   %[[LEN_SIZE_T:.*]] = zext i32 %[[LEN]] to i64
+// LLVM:   %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// LLVM:   store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// LLVM:   %[[S1:.*]] = alloca i8, i64 %[[LEN_SIZE_T]]
+// LLVM:   %[[I:.*]] = alloca i32
+// LLVM:   store i32 0, ptr %[[I]]
+// LLVM:   br label %[[WHILE_START:.*]]
+// LLVM: [[WHILE_START]]:
+// LLVM:   br label %[[WHILE_COND:.*]]
+// LLVM: [[WHILE_COND]]:
+// LLVM:   %[[CUR_I:.*]] = load i32, ptr %[[I]]
+// LLVM:   %[[NEXT:.*]] = add i32 %[[CUR_I]], 1
+// LLVM:   store i32 %[[NEXT]], ptr %[[I]]
+// LLVM:   %[[LEN2:.*]] = load i32, ptr %[[LEN_ADDR]]
+// LLVM:   %[[CMP:.*]] = icmp ult i32 %[[NEXT]], %[[LEN2]]
+// LLVM:   br i1 %[[CMP]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]]
+// LLVM: [[WHILE_BODY]]:
+// LLVM:   br label %[[WHILE_BODY2:.*]]
+// LLVM: [[WHILE_BODY2]]:
+// LLVM:   %[[I_LEN:.*]] = load i32, ptr %[[I]]
+// LLVM:   %[[I_LEN_SIZE_T2:.*]] = zext i32 %[[I_LEN]] to i64
+// LLVM:   %[[STACK_PTR2:.*]] = call ptr @llvm.stacksave.p0()
+// LLVM:   store ptr %[[STACK_PTR2]], ptr %[[SAVED_STACK2]]
+// LLVM:   %[[S2:.*]] = alloca i8, i64 %[[I_LEN_SIZE_T2]]
+// LLVM:   %[[STACK_RESTORE_PTR2:.*]] = load ptr, ptr %[[SAVED_STACK2]]
+// LLVM:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR2]])
+// LLVM:   br label %[[WHILE_BODY_END:.*]]
+// LLVM: [[WHILE_BODY_END]]:
+// LLVM:   br label %[[WHILE_COND]]
+// LLVM: [[WHILE_END]]:
+// LLVM:   br label %[[F3_END:.*]]
+// LLVM: [[F3_END]]:
+// LLVM:   %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// LLVM:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+
+// Note: VLA_EXPR0 and VLA_EXPR1 below are emitted to capture debug info.
+
+// OGCG: define{{.*}} void @f3(i32 {{.*}} %[[LEN_ARG:.*]])
+// OGCG:   %[[LEN_ADDR:.*]] = alloca i32
+// OGCG:   %[[SAVED_STACK:.*]] = alloca ptr
+// OGCG:   %[[VLA_EXPR0:.*]] = alloca i64
+// OGCG:   %[[I:.*]] = alloca i32
+// OGCG:   %[[SAVED_STACK1:.*]] = alloca ptr
+// OGCG:   %[[VLA_EXPR1:.*]] = alloca i64
+// OGCG:   store i32 %[[LEN_ARG]], ptr %[[LEN_ADDR]]
+// OGCG:   %[[LEN:.*]] = load i32, ptr %[[LEN_ADDR]]
+// OGCG:   %[[LEN_SIZE_T:.*]] = zext i32 %[[LEN]] to i64
+// OGCG:   %[[STACK_PTR:.*]] = call ptr @llvm.stacksave.p0()
+// OGCG:   store ptr %[[STACK_PTR]], ptr %[[SAVED_STACK]]
+// OGCG:   %[[S1:.*]] = alloca i8, i64 %[[LEN_SIZE_T]]
+// OGCG:   store i64 %[[LEN_SIZE_T]], ptr %[[VLA_EXPR0]]
+// OGCG:   br label %[[WHILE_COND:.*]]
+// OGCG: [[WHILE_COND]]:
+// OGCG:   %[[CUR_I:.*]] = load i32, ptr %[[I]]
+// OGCG:   %[[NEXT:.*]] = add i32 %[[CUR_I]], 1
+// OGCG:   store i32 %[[NEXT]], ptr %[[I]]
+// OGCG:   %[[LEN2:.*]] = load i32, ptr %[[LEN_ADDR]]
+// OGCG:   %[[CMP:.*]] = icmp ult i32 %[[NEXT]], %[[LEN2]]
+// OGCG:   br i1 %[[CMP]], label %[[WHILE_BODY:.*]], label %[[WHILE_END:.*]]
+// OGCG: [[WHILE_BODY]]:
+// OGCG:   %[[I_LEN:.*]] = load i32, ptr %[[I]]
+// OGCG:   %[[I_LEN_SIZE_T:.*]] = zext i32 %[[I_LEN]] to i64
+// OGCG:   %[[STACK_PTR1:.*]] = call ptr @llvm.stacksave.p0()
+// OGCG:   store ptr %[[STACK_PTR1]], ptr %[[SAVED_STACK1]]
+// OGCG:   %[[S2:.*]] = alloca i8, i64 %[[I_LEN_SIZE_T]]
+// OGCG:   store i64 %[[I_LEN_SIZE_T]], ptr %[[VLA_EXPR1]]
+// OGCG:   %[[STACK_RESTORE_PTR1:.*]] = load ptr, ptr %[[SAVED_STACK1]]
+// OGCG:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR1]])
+// OGCG:   br label %[[WHILE_COND]]
+// OGCG: [[WHILE_END]]:
+// OGCG:   %[[STACK_RESTORE_PTR:.*]] = load ptr, ptr %[[SAVED_STACK]]
+// OGCG:   call void @llvm.stackrestore.p0(ptr %[[STACK_RESTORE_PTR]])
+
+
+// The following test case is disabled because it runs into a bug (unrelated
+// to VLA) in the handling of cleanups in loops with break statements.
+//
+// void f4(unsigned len) {
+//     char s1[len];
+//     while (1) {
+//       char s2[len];
+//       if (1)
+//         break;
+//     }
+// }
+  
+\ No newline at end of file
diff --git a/clang/test/CXX/module/module.import/p6.cpp b/clang/test/CXX/module/module.import/p6.cpp
index cb2d799..9e378a5 100644
--- a/clang/test/CXX/module/module.import/p6.cpp
+++ b/clang/test/CXX/module/module.import/p6.cpp
@@ -3,6 +3,9 @@
 
 // RUN: %clang_cc1 -std=c++20 -x c++-header %t/bad-header-unit.h \
 // RUN:  -emit-header-unit -o %t/bad-header-unit.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -x c++-header %t/bad-header-unit-declspec.h \
+// RUN:  -emit-header-unit -o %t/bad-header-unit.pcm -verify \
+// RUN:  -fdeclspec
 
 //--- bad-header-unit.h
 
@@ -77,3 +80,13 @@ template <typename T> bool b() {
 }
 
 inline bool B = b<int>();
+
+__attribute__((weak)) int weak_fun_definition() { return 42; }
+
+__attribute__((weak)) int weak_var_definition = 42;
+
+//--- bad-header-unit-declspec.h
+
+/* The cases below should compile without diagnostics.  */
+
+__declspec(selectany) int selectany_var_definition = 42; // expected-no-diagnostics
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index dc64f96..b798618 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1130,6 +1130,8 @@ __m256i test_mm256_shuffle_epi8(__m256i a, __m256i b) {
   return _mm256_shuffle_epi8(a, b);
 }
 
+TEST_CONSTEXPR(match_v32qi(_mm256_shuffle_epi8((__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qs){0,33,2,35,4,37,6,-39,8,41,10,43,12,45,14,-47,16,49,18,51,20,53,22,-55,24,57,26,59,28,61,30,-63}), 0,1,2,3,4,5,6,0,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,0,24,25,26,27,28,29,30,0));
+
 __m256i test_mm256_shuffle_epi32(__m256i a) {
   // CHECK-LABEL: test_mm256_shuffle_epi32
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index af1c904..fddf17d 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1466,18 +1466,27 @@ __m512i test_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
   // CHECK: @llvm.x86.avx512.pshuf.b.512
   return _mm512_shuffle_epi8(__A,__B); 
 }
+
+TEST_CONSTEXPR(match_v64qi(_mm512_shuffle_epi8((__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,-15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,-15,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,-79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,-95}), 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,0,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,0,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,0));
+
 __m512i test_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_shuffle_epi8
   // CHECK: @llvm.x86.avx512.pshuf.b.512
   // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
   return _mm512_mask_shuffle_epi8(__W,__U,__A,__B); 
 }
+
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_shuffle_epi8((__m512i)(__v64qi){1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8}, 0xFFFFFFFF00000000, (__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qi){63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48));
+
 __m512i test_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_shuffle_epi8
   // CHECK: @llvm.x86.avx512.pshuf.b.512
   // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
   return _mm512_maskz_shuffle_epi8(__U,__A,__B); 
 }
+
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_shuffle_epi8(0x8888888888888888,(__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qi){127,126,125,124,123,122,121,120,119,118,117,116,115,114,113,112,111,110,109,108,107,106,105,104,103,102,101,100,99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65,64}), 0,0,0,12,0,0,0,8,0,0,0,4,0,0,0,0,0,0,0,28,0,0,0,24,0,0,0,20,0,0,0,16,0,0,0,44,0,0,0,40,0,0,0,36,0,0,0,32,0,0,0,60,0,0,0,56,0,0,0,52,0,0,0,48));
+
 __m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_subs_epi8
   // CHECK: @llvm.ssub.sat.v64i8
diff --git a/clang/test/CodeGen/X86/avx512cd-builtins.c b/clang/test/CodeGen/X86/avx512cd-builtins.c
index b9d42b7..2890889 100644
--- a/clang/test/CodeGen/X86/avx512cd-builtins.c
+++ b/clang/test/CodeGen/X86/avx512cd-builtins.c
@@ -125,6 +125,8 @@ __m512i test_mm512_broadcastmb_epi64(__m512i a, __m512i b) {
   // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 7
   return _mm512_broadcastmb_epi64(_mm512_cmpeq_epu64_mask ( a, b)); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_broadcastmb_epi64((__mmask8)(0)), 0,0,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8di(_mm512_broadcastmb_epi64((__mmask8)(0xab)), 0xab,0xab,0xab,0xab, 0xab,0xab,0xab,0xab));
 
 __m512i test_mm512_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK-LABEL: test_mm512_broadcastmw_epi32
@@ -148,3 +150,5 @@ __m512i test_mm512_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
   return _mm512_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b)); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_broadcastmw_epi32((__mmask16)(0xff)), 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff));
+TEST_CONSTEXPR(match_v16si(_mm512_broadcastmw_epi32((__mmask16)(0x0FA1L)), 0x0FA1L,0x0FA1L,0x0FA1L,0x0FA1L, 0x0FA1L,0x0FA1L,0x0FA1L,0x0FA1L, 0x0FA1L,0x0FA1L,0x0FA1L,0x0FA1L, 0x0FA1L,0x0FA1L,0x0FA1L,0x0FA1L));
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index c0e46de..d569283 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -1688,24 +1688,37 @@ __m128i test_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m12
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_shuffle_epi8(__W,__U,__A,__B); 
 }
+
+TEST_CONSTEXPR(match_v16qi(_mm_mask_shuffle_epi8((__m128i)(__v16qi){1,1,1,1,1,1,1,1,2,2,4,4,6,6,8,8}, 0x00FF, (__m128i)(__v16qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}, (__m128i)(__v16qi){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 15,14,13,12,11,10,9,8,2,2,4,4,6,6,8,8));
+
 __m128i test_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_shuffle_epi8
   // CHECK: @llvm.x86.ssse3.pshuf.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_shuffle_epi8(__U,__A,__B); 
 }
+
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_shuffle_epi8(0xAAAA, (__m128i)(__v16qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}, (__m128i)(__v16qi){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 0,14,0,12,0,10,0,8,0,6,0,4,0,2,0,0));
+
 __m256i test_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_shuffle_epi8
   // CHECK: @llvm.x86.avx2.pshuf.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_shuffle_epi8(__W,__U,__A,__B); 
 }
+
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_shuffle_epi8((__m256i)(__v32qi){1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4}, 0x80808080, (__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qi){31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 1,1,1,1,1,1,1,8,2,2,2,2,2,2,2,0,3,3,3,3,3,3,3,24,4,4,4,4,4,4,4,16));
+                
+
 __m256i test_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_shuffle_epi8
   // CHECK: @llvm.x86.avx2.pshuf.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_shuffle_epi8(__U,__A,__B); 
 }
+
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_shuffle_epi8(0x0000FFFF, (__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qi){31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
+
 __m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_subs_epi8
   // CHECK: @llvm.ssub.sat.v16i8
diff --git a/clang/test/CodeGen/X86/avx512vlcd-builtins.c b/clang/test/CodeGen/X86/avx512vlcd-builtins.c
index 1619305..56c04a0 100644
--- a/clang/test/CodeGen/X86/avx512vlcd-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlcd-builtins.c
@@ -20,6 +20,7 @@ __m128i test_mm_broadcastmb_epi64(__m128i a,__m128i b) {
   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
   return _mm_broadcastmb_epi64(_mm_cmpeq_epi32_mask (a, b)); 
 }
+TEST_CONSTEXPR(match_v2du(_mm_broadcastmb_epi64((__mmask8)(76)), 76, 76));
 
 __m256i test_mm256_broadcastmb_epi64(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_broadcastmb_epi64
@@ -32,6 +33,7 @@ __m256i test_mm256_broadcastmb_epi64(__m256i a, __m256i b) {
   // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3
   return _mm256_broadcastmb_epi64(_mm256_cmpeq_epi64_mask ( a, b)); 
 }
+TEST_CONSTEXPR(match_v4di(_mm256_broadcastmb_epi64((__mmask8)(67)), 67, 67, 67, 67));
 
 __m128i test_mm_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK-LABEL: test_mm_broadcastmw_epi32
@@ -43,6 +45,7 @@ __m128i test_mm_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
   return _mm_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b));
 }
+TEST_CONSTEXPR(match_v4su(_mm_broadcastmw_epi32((__mmask16)(0xbabe)), 0xbabe, 0xbabe, 0xbabe, 0xbabe));
 
 __m256i test_mm256_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK-LABEL: test_mm256_broadcastmw_epi32
@@ -58,6 +61,7 @@ __m256i test_mm256_broadcastmw_epi32(__m512i a, __m512i b) {
   // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7
   return _mm256_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b)); 
 }
+TEST_CONSTEXPR(match_v8si(_mm256_broadcastmw_epi32((__mmask16)(0xcafe)), 0xcafe,0xcafe,0xcafe,0xcafe, 0xcafe,0xcafe,0xcafe,0xcafe));
 
 __m128i test_mm_conflict_epi64(__m128i __A) {
   // CHECK-LABEL: test_mm_conflict_epi64
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index a1e05a1..d9041d4 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -589,6 +589,8 @@ __m64 test_mm_shuffle_pi8(__m64 a, __m64 b) {
   return _mm_shuffle_pi8(a, b);
 }
 
+TEST_CONSTEXPR(match_v8qi(_mm_shuffle_pi8((__m64)(__v8qi){0,1,2,3,4,5,6,7}, (__m64)(__v8qi){10,20,30,40,50,60,70,80}), 2,4,6,0,2,4,6,0));
+
 __m64 test_mm_shuffle_pi16(__m64 a) {
   // CHECK-LABEL: test_mm_shuffle_pi16
   // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index e623599..32abd9d 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -117,6 +117,8 @@ __m128i test_mm_shuffle_epi8(__m128i a, __m128i b) {
   return _mm_shuffle_epi8(a, b);
 }
 
+TEST_CONSTEXPR(match_v16qi(_mm_shuffle_epi8((__m128i)(__v16qs){0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15}, (__m128i)(__v16qs){15,-14,13,-12,11,-10,9,-8,7,-6,5,-4,3,-2,1,0}), -15,0,-13,0,-11,0,-9,0,-7,0,-5,0,-3,0,-1,0));
+
 __m128i test_mm_sign_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_sign_epi8
   // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c
index 07f47d9..607e3e4 100644
--- a/clang/test/CodeGen/attr-target-mv.c
+++ b/clang/test/CodeGen/attr-target-mv.c
@@ -30,6 +30,7 @@ int __attribute__((target("arch=gracemont"))) foo(void) {return 24;}
 int __attribute__((target("arch=pantherlake"))) foo(void) {return 25;}
 int __attribute__((target("arch=clearwaterforest"))) foo(void) {return 26;}
 int __attribute__((target("arch=diamondrapids"))) foo(void) {return 27;}
+int __attribute__((target("arch=wildcatlake"))) foo(void) {return 28;}
 int __attribute__((target("default"))) foo(void) { return 2; }
 
 int bar(void) {
@@ -203,6 +204,8 @@ void calls_pr50025c(void) { pr50025c(); }
 // ITANIUM: ret i32 26
 // ITANIUM: define{{.*}} i32 @foo.arch_diamondrapids()
 // ITANIUM: ret i32 27
+// ITANIUM: define{{.*}} i32 @foo.arch_wildcatlake()
+// ITANIUM: ret i32 28
 // ITANIUM: define{{.*}} i32 @foo()
 // ITANIUM: ret i32 2
 // ITANIUM: define{{.*}} i32 @bar()
@@ -262,6 +265,8 @@ void calls_pr50025c(void) { pr50025c(); }
 // WINDOWS: ret i32 26
 // WINDOWS: define dso_local i32 @foo.arch_diamondrapids()
 // WINDOWS: ret i32 27
+// WINDOWS: define dso_local i32 @foo.arch_wildcatlake()
+// WINDOWS: ret i32 28
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 120f1a5..2c0d83c 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -178,6 +178,7 @@ void verifycpustrings(void) {
   (void)__builtin_cpu_is("lunarlake");
   (void)__builtin_cpu_is("clearwaterforest");
   (void)__builtin_cpu_is("pantherlake");
+  (void)__builtin_cpu_is("wildcatlake");
   (void)__builtin_cpu_is("haswell");
   (void)__builtin_cpu_is("icelake-client");
   (void)__builtin_cpu_is("icelake-server");
diff --git a/clang/test/CodeGen/unified-lto-module-flag.ll b/clang/test/CodeGen/unified-lto-module-flag.ll
new file mode 100644
index 0000000..deefe82
--- /dev/null
+++ b/clang/test/CodeGen/unified-lto-module-flag.ll
@@ -0,0 +1,11 @@
+; Test that we do not duplicate the UnifiedLTO module flag.
+;
+; RUN: %clang_cc1 -emit-llvm -flto=full -funified-lto -o - %s | FileCheck %s
+
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"wchar_size", i32 2}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+!3 = !{i32 1, !"UnifiedLTO", i32 1}
diff --git a/clang/test/CodeGenHLSL/basic-target.c b/clang/test/CodeGenHLSL/basic-target.c
index c700e06..b9482df 100644
--- a/clang/test/CodeGenHLSL/basic-target.c
+++ b/clang/test/CodeGenHLSL/basic-target.c
@@ -6,5 +6,5 @@
 // RUN: %clang -cc1 -triple dxil-pc-shadermodel6.0-domain -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang -cc1 -triple dxil-pc-shadermodel6.0-geometry -emit-llvm -o - %s | FileCheck %s
 
-// CHECK: target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+// CHECK: target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64"
 // CHECK: target triple = "dxilv1.0-pc-shadermodel6.0-{{[a-z]+}}"
diff --git a/clang/test/CodeGenHLSL/vk-features/maximal_reconvergence.hlsl b/clang/test/CodeGenHLSL/vk-features/maximal_reconvergence.hlsl
new file mode 100644
index 0000000..f23ac7c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/vk-features/maximal_reconvergence.hlsl
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple spirv1.6-unknown-vulkan1.3-compute -fspv-enable-maximal-reconvergence -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple spirv1.6-unknown-vulkan1.3-compute -hlsl-entry test -fspv-enable-maximal-reconvergence -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK-ENTRY
+
+[numthreads(1,1,1)]
+void main() {
+// CHECK: define void @main() [[attributeNumber:#[0-9]+]] {
+}
+
+// CHECK: attributes [[attributeNumber]] = {{.*}} "enable-maximal-reconvergence"="true" {{.*}}
+
+
+[numthreads(1,1,1)]
+void test() {
+// CHECK-ENTRY: define void @test() [[attributeNumber:#[0-9]+]] {
+}
+    
+// CHECK-ENTRY: attributes [[attributeNumber]] = {{.*}} "enable-maximal-reconvergence"="true" {{.*}}
diff --git a/clang/test/Driver/gpu-libc-headers.c b/clang/test/Driver/gpu-libc-headers.c
deleted file mode 100644
index 1802919..0000000
--- a/clang/test/Driver/gpu-libc-headers.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a --sysroot=%S/Inputs/basic_gpu_tree \
-// RUN:     -ccc-install-dir %S/Inputs/basic_gpu_tree/bin -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-AMDGPU
-// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 --sysroot=%S/Inputs/basic_gpu_tree \
-// RUN:     -ccc-install-dir %S/Inputs/basic_gpu_tree/bin -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-NVPTX
-// CHECK-HEADERS-AMDGPU: "-cc1"{{.*}}"-isysroot"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}amdgcn-amd-amdhsa"
-// CHECK-HEADERS-NVPTX: "-cc1"{{.*}}"-isysroot"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}nvptx64-nvidia-cuda"
-
-// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
-// RUN:     -nogpuinc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
-// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
-// RUN:     -nostdinc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
-// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
-// RUN:     -nobuiltininc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
-// CHECK-HEADERS-DISABLED-NOT: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}gpu-none-llvm"
diff --git a/clang/test/Driver/gpu-libc.c b/clang/test/Driver/gpu-libc.c
new file mode 100644
index 0000000..88f346f3
--- /dev/null
+++ b/clang/test/Driver/gpu-libc.c
@@ -0,0 +1,32 @@
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx90a --sysroot=%S/Inputs/basic_gpu_tree \
+// RUN:     -ccc-install-dir %S/Inputs/basic_gpu_tree/bin -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-AMDGPU
+// RUN:   %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 --sysroot=%S/Inputs/basic_gpu_tree \
+// RUN:     -ccc-install-dir %S/Inputs/basic_gpu_tree/bin -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-NVPTX
+// CHECK-HEADERS-AMDGPU: "-cc1"{{.*}}"-isysroot"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}amdgcn-amd-amdhsa"
+// CHECK-HEADERS-NVPTX: "-cc1"{{.*}}"-isysroot"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}nvptx64-nvidia-cuda"
+
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN:     -nogpuinc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN:     -nostdinc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
+// RUN:   %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -nogpulib \
+// RUN:     -nobuiltininc %s 2>&1 | FileCheck %s --check-prefix=CHECK-HEADERS-DISABLED
+// CHECK-HEADERS-DISABLED-NOT: "-cc1"{{.*}}"-internal-isystem" "{{.*}}include{{.*}}gpu-none-llvm"
+
+
+// RUN:   %clang -### -fopenmp=libomp --target=x86_64-unknown-linux-gnu \
+// RUN:     --offload-arch=gfx908 --rocm-path=%S/Inputs/rocm --sysroot=%S/Inputs/basic_gpu_tree \
+// RUN:     -ccc-install-dir %S/Inputs/basic_gpu_tree/bin %s 2>&1 | FileCheck %s --check-prefix=OPENMP-AMDGPU
+// OPENMP-AMDGPU: clang-linker-wrapper{{.*}}"--device-linker=amdgcn-amd-amdhsa=-lc"
+// RUN:   %clang -### -fopenmp=libomp --target=x86_64-unknown-linux-gnu -foffload-lto \
+// RUN:     --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda --sysroot=%S/Inputs/basic_gpu_tree \
+// RUN:     -ccc-install-dir %S/Inputs/basic_gpu_tree/bin %s 2>&1 | FileCheck %s --check-prefix=OPENMP-NVPTX
+// OPENMP-NVPTX: clang-linker-wrapper{{.*}}"--device-linker=nvptx64-nvidia-cuda=-lc"
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu --offload-arch=gfx908 \
+// RUN:     --offload-new-driver --rocm-path=%S/Inputs/rocm --sysroot=%S/Inputs/basic_gpu_tree \
+// RUN:     -ccc-install-dir %S/Inputs/basic_gpu_tree/bin -x hip %s 2>&1 | FileCheck %s --check-prefix=HIP
+// HIP-NOT: "--device-linker=amdgcn-amd-amdhsa=-lc"
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fgpu-rdc --offload-arch=sm_52 \
+// RUN:     --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda --sysroot=%S/Inputs/basic_gpu_tree \
+// RUN:     -ccc-install-dir %S/Inputs/basic_gpu_tree/bin -x cuda %s 2>&1 | FileCheck %s --check-prefix=CUDA
+// CUDA-NOT: "--device-linker=nvptx64-nvidia-cuda=-lc"
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index 341f01c..24404ff 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -116,6 +116,10 @@
 // RUN:   | FileCheck %s -check-prefix=pantherlake
 // pantherlake: "-target-cpu" "pantherlake"
 //
+// RUN: %clang --target=x86_64 -c -### %s -march=wildcatlake 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=wildcatlake
+// wildcatlake: "-target-cpu" "wildcatlake"
+//
 // RUN: %clang --target=x86_64 -c -### %s -march=clearwaterforest 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=clearwaterforest
 // clearwaterforest: "-target-cpu" "clearwaterforest"
diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c b/clang/test/Misc/target-invalid-cpu-note/x86.c
index f89cdc2..3906318 100644
--- a/clang/test/Misc/target-invalid-cpu-note/x86.c
+++ b/clang/test/Misc/target-invalid-cpu-note/x86.c
@@ -63,6 +63,7 @@
 // X86-SAME: {{^}}, lunarlake
 // X86-SAME: {{^}}, gracemont
 // X86-SAME: {{^}}, pantherlake
+// X86-SAME: {{^}}, wildcatlake
 // X86-SAME: {{^}}, sierraforest
 // X86-SAME: {{^}}, grandridge
 // X86-SAME: {{^}}, graniterapids
@@ -150,6 +151,7 @@
 // X86_64-SAME: {{^}}, lunarlake
 // X86_64-SAME: {{^}}, gracemont
 // X86_64-SAME: {{^}}, pantherlake
+// X86_64-SAME: {{^}}, wildcatlake
 // X86_64-SAME: {{^}}, sierraforest
 // X86_64-SAME: {{^}}, grandridge
 // X86_64-SAME: {{^}}, graniterapids
@@ -246,6 +248,7 @@
 // TUNE_X86-SAME: {{^}}, lunarlake
 // TUNE_X86-SAME: {{^}}, gracemont
 // TUNE_X86-SAME: {{^}}, pantherlake
+// TUNE_X86-SAME: {{^}}, wildcatlake
 // TUNE_X86-SAME: {{^}}, sierraforest
 // TUNE_X86-SAME: {{^}}, grandridge
 // TUNE_X86-SAME: {{^}}, graniterapids
@@ -349,6 +352,7 @@
 // TUNE_X86_64-SAME: {{^}}, lunarlake
 // TUNE_X86_64-SAME: {{^}}, gracemont
 // TUNE_X86_64-SAME: {{^}}, pantherlake
+// TUNE_X86_64-SAME: {{^}}, wildcatlake
 // TUNE_X86_64-SAME: {{^}}, sierraforest
 // TUNE_X86_64-SAME: {{^}}, grandridge
 // TUNE_X86_64-SAME: {{^}}, graniterapids
diff --git a/clang/test/Parser/DelayedTemplateParsing.cpp b/clang/test/Parser/DelayedTemplateParsing.cpp
index bcd286a..072c7ce 100644
--- a/clang/test/Parser/DelayedTemplateParsing.cpp
+++ b/clang/test/Parser/DelayedTemplateParsing.cpp
@@ -43,10 +43,10 @@ void undeclared()
 
 }
 
-template <class T> void foo5() {} //expected-note {{previous definition is here}} 
+template <class T> void foo5() {} //expected-note {{previous definition is here}}
 template <class T> void foo5() {} // expected-error {{redefinition of 'foo5'}}
 
-              
+
 
 
 namespace PR11931 {
@@ -195,3 +195,12 @@ template <typename> struct PR38460_2 {
   }
 };
 template struct PR38460_2<int>;
+
+namespace LateParsedAttrs {
+  template <class>
+  void f(int a) __attribute__((__diagnose_if__(a > 0, "foo", "error"))) {}
+  // expected-note@-1 {{from 'diagnose_if' attribute on 'f<int>'}}
+  void g() {
+    f<int>(1); // expected-error {{foo}}
+  }
+} // namespace LateParsedAttrs
diff --git a/clang/test/Preprocessor/embed___has_embed_parsing_errors.c b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
index 9c512a48..8ab53f6 100644
--- a/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
+++ b/clang/test/Preprocessor/embed___has_embed_parsing_errors.c
@@ -250,3 +250,35 @@
 
 #if __has_embed("") // expected-error {{empty filename}}
 #endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed (__FILE__  foo limit(1)
+#endif
+
+//--- test3.c
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed (__FILE__  foo
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed ("a" foo()
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed ("a" bar() foo
+#endif
+
+// expected-error@+3 {{missing ')' after '__has_embed'}} \
+   expected-error@+3 {{expected value in expression}} \
+   expected-note@+3 {{to match this '('}}
+#if __has_embed (__FILE__ limit(1) foo
+int a = __has_embed (__FILE__);
+#endif
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 44089c4..e2f4bcb 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -2526,6 +2526,9 @@
 // RUN: %clang -march=pantherlake -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NKL_M32
+// RUN: %clang -march=wildcatlake -m32 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NKL_M32
 // RUN: %clang -march=clearwaterforest -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_CWF_M32,CHECK_NKL_M32
@@ -2630,6 +2633,9 @@
 // RUN: %clang -march=pantherlake -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NKL_M64
+// RUN: %clang -march=wildcatlake -m64 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NKL_M64
 // RUN: %clang -march=clearwaterforest -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_CWF_M64,CHECK_NKL_M64
diff --git a/clang/test/Sema/attr-cpuspecific-cpus.c b/clang/test/Sema/attr-cpuspecific-cpus.c
index 48543ac..0874d0c 100644
--- a/clang/test/Sema/attr-cpuspecific-cpus.c
+++ b/clang/test/Sema/attr-cpuspecific-cpus.c
@@ -87,3 +87,4 @@ ATTR(cpu_specific(lunarlake)) void CPU37(void){}
 ATTR(cpu_specific(gracemont)) void CPU38(void){}
 ATTR(cpu_specific(pantherlake)) void CPU39(void){}
 ATTR(cpu_specific(clearwaterforest)) void CPU40(void){}
+ATTR(cpu_specific(wildcatlake)) void CPU41(void){}
diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp
index 901d510..9ef44d03 100644
--- a/clang/test/SemaCXX/type-traits.cpp
+++ b/clang/test/SemaCXX/type-traits.cpp
@@ -2066,7 +2066,28 @@ public:
     UserProvidedConstructor(const UserProvidedConstructor&)            = delete;
     UserProvidedConstructor& operator=(const UserProvidedConstructor&) = delete;
 };
+struct Ctr {
+  Ctr();
+};
+struct Ctr2 {
+  Ctr2();
+private:
+  NoEligibleTrivialContructor inner;
+};
+
+struct NonCopyable{
+    NonCopyable() = default;
+    NonCopyable(const NonCopyable&) = delete;
+};
+
+class C {
+    NonCopyable nc;
+};
 
+static_assert(__builtin_is_implicit_lifetime(Ctr));
+static_assert(!__builtin_is_implicit_lifetime(Ctr2));
+static_assert(__builtin_is_implicit_lifetime(C));
+static_assert(!__builtin_is_implicit_lifetime(NoEligibleTrivialContructor));
 static_assert(__builtin_is_implicit_lifetime(NonAggregate));
 static_assert(!__builtin_is_implicit_lifetime(DataMemberInitializer));
 static_assert(!__builtin_is_implicit_lifetime(UserProvidedConstructor));
@@ -2076,9 +2097,27 @@ template <typename T>
 class Tpl {
     Tpl() requires false = default ;
 };
-static_assert(!__builtin_is_implicit_lifetime(Tpl<int>));
+static_assert(__builtin_is_implicit_lifetime(Tpl<int>));
+
+template <typename>
+class MultipleDefaults {
+  MultipleDefaults() {};
+  MultipleDefaults() requires true = default;
+};
+static_assert(__builtin_is_implicit_lifetime(MultipleDefaults<int>));
+template <typename>
+class MultipleDefaults2 {
+  MultipleDefaults2() requires true {};
+  MultipleDefaults2() = default;
+};
+
+static_assert(__builtin_is_implicit_lifetime(MultipleDefaults2<int>));
+
 
 #endif
+
+
+
 }
 
 void is_signed()
diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp
index ef79e45..50da2f8 100644
--- a/clang/tools/driver/cc1as_main.cpp
+++ b/clang/tools/driver/cc1as_main.cpp
@@ -417,7 +417,8 @@ getOutputStream(StringRef Path, DiagnosticsEngine &Diags, bool Binary) {
 }
 
 static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
-                                 DiagnosticsEngine &Diags) {
+                                 DiagnosticsEngine &Diags,
+                                 IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
   // Get the target specific parser.
   std::string Error;
   const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error);
@@ -440,6 +441,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
   // Record the location of the include directories so that the lexer can find
   // it later.
   SrcMgr.setIncludeDirs(Opts.IncludePaths);
+  SrcMgr.setVirtualFileSystem(VFS);
 
   std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(Opts.Triple));
   assert(MRI && "Unable to create target register info!");
@@ -632,8 +634,9 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
 }
 
 static bool ExecuteAssembler(AssemblerInvocation &Opts,
-                             DiagnosticsEngine &Diags) {
-  bool Failed = ExecuteAssemblerImpl(Opts, Diags);
+                             DiagnosticsEngine &Diags,
+                             IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
+  bool Failed = ExecuteAssemblerImpl(Opts, Diags, VFS);
 
   // Delete output file if there were errors.
   if (Failed) {
@@ -714,7 +717,7 @@ int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
   }
 
   // Execute the invocation, unless there were parsing errors.
-  bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags);
+  bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags, VFS);
 
   // If any timers were active but haven't been destroyed yet, print their
   // results now.
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index d18c45e..fc27fd2 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -1644,9 +1644,9 @@ bool CursorVisitor::VisitTagTypeLoc(TagTypeLoc TL) {
     return true;
 
   if (TL.isDefinition())
-    return Visit(MakeCXCursor(TL.getOriginalDecl(), TU, RegionOfInterest));
+    return Visit(MakeCXCursor(TL.getDecl(), TU, RegionOfInterest));
 
-  return Visit(MakeCursorTypeRef(TL.getOriginalDecl(), TL.getNameLoc(), TU));
+  return Visit(MakeCursorTypeRef(TL.getDecl(), TL.getNameLoc(), TU));
 }
 
 bool CursorVisitor::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) {
@@ -1852,7 +1852,7 @@ bool CursorVisitor::VisitPackIndexingTypeLoc(PackIndexingTypeLoc TL) {
 }
 
 bool CursorVisitor::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) {
-  return Visit(MakeCursorTypeRef(TL.getOriginalDecl(), TL.getNameLoc(), TU));
+  return Visit(MakeCursorTypeRef(TL.getDecl(), TL.getNameLoc(), TU));
 }
 
 bool CursorVisitor::VisitAtomicTypeLoc(AtomicTypeLoc TL) {
diff --git a/clang/tools/libclang/CIndexCodeCompletion.cpp b/clang/tools/libclang/CIndexCodeCompletion.cpp
index 6d14f28..81448b4 100644
--- a/clang/tools/libclang/CIndexCodeCompletion.cpp
+++ b/clang/tools/libclang/CIndexCodeCompletion.cpp
@@ -617,7 +617,7 @@ namespace {
       if (!baseType.isNull()) {
         // Get the declaration for a class/struct/union/enum type
         if (const TagType *Tag = baseType->getAs<TagType>())
-          D = Tag->getOriginalDecl();
+          D = Tag->getDecl();
         // Get the @interface declaration for a (possibly-qualified) Objective-C
         // object pointer type, e.g., NSString*
         else if (const ObjCObjectPointerType *ObjPtr = 
@@ -629,7 +629,7 @@ namespace {
         // Get the class for a C++ injected-class-name
         else if (const InjectedClassNameType *Injected =
                  baseType->getAs<InjectedClassNameType>())
-          D = Injected->getOriginalDecl();
+          D = Injected->getDecl();
       }
 
       if (D != nullptr) {
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 56f113c..0a43d73 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -1338,7 +1338,7 @@ CXCursor cxcursor::getTypeRefCursor(CXCursor cursor) {
   if (const TypedefType *Typedef = Ty->getAs<TypedefType>())
     return MakeCursorTypeRef(Typedef->getDecl(), Loc, TU);
   if (const TagType *Tag = Ty->getAs<TagType>())
-    return MakeCursorTypeRef(Tag->getOriginalDecl(), Loc, TU);
+    return MakeCursorTypeRef(Tag->getDecl(), Loc, TU);
   if (const TemplateTypeParmType *TemplP = Ty->getAs<TemplateTypeParmType>())
     return MakeCursorTypeRef(TemplP->getDecl(), Loc, TU);
 
diff --git a/clang/tools/libclang/CXIndexDataConsumer.cpp b/clang/tools/libclang/CXIndexDataConsumer.cpp
index 932201a..c97aefa 100644
--- a/clang/tools/libclang/CXIndexDataConsumer.cpp
+++ b/clang/tools/libclang/CXIndexDataConsumer.cpp
@@ -357,7 +357,7 @@ CXIndexDataConsumer::CXXBasesListInfo::CXXBasesListInfo(const CXXRecordDecl *D,
           TST = T->getAs<TemplateSpecializationType>()) {
       BaseD = TST->getTemplateName().getAsTemplateDecl();
     } else if (const RecordType *RT = T->getAs<RecordType>()) {
-      BaseD = RT->getOriginalDecl();
+      BaseD = RT->getDecl();
     }
 
     if (BaseD)
diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp
index d21ac7c..3feb563 100644
--- a/clang/tools/libclang/CXType.cpp
+++ b/clang/tools/libclang/CXType.cpp
@@ -546,11 +546,11 @@ try_again:
     break;
   case Type::Record:
   case Type::Enum:
-    D = cast<TagType>(TP)->getOriginalDecl();
+    D = cast<TagType>(TP)->getDecl();
     break;
   case Type::TemplateSpecialization:
     if (const RecordType *Record = TP->getAs<RecordType>())
-      D = Record->getOriginalDecl();
+      D = Record->getDecl();
     else
       D = cast<TemplateSpecializationType>(TP)->getTemplateName()
                                                          .getAsTemplateDecl();
@@ -564,7 +564,7 @@ try_again:
     break;
 
   case Type::InjectedClassName:
-    D = cast<InjectedClassNameType>(TP)->getOriginalDecl();
+    D = cast<InjectedClassNameType>(TP)->getDecl();
     break;
 
     // FIXME: Template type parameters!
@@ -1037,7 +1037,7 @@ static long long visitRecordForValidation(const RecordDecl *RD) {
       return CXTypeLayoutError_Dependent;
     // recurse
     if (const RecordType *ChildType = I->getType()->getAs<RecordType>()) {
-      if (const RecordDecl *Child = ChildType->getOriginalDecl()) {
+      if (const RecordDecl *Child = ChildType->getDecl()) {
         long long ret = visitRecordForValidation(Child);
         if (ret < 0)
           return ret;
diff --git a/clang/unittests/AST/ASTContextParentMapTest.cpp b/clang/unittests/AST/ASTContextParentMapTest.cpp
index 4a8aa48..545eaf3 100644
--- a/clang/unittests/AST/ASTContextParentMapTest.cpp
+++ b/clang/unittests/AST/ASTContextParentMapTest.cpp
@@ -132,8 +132,7 @@ TEST(GetParents, FriendTypeLoc) {
   TypeLoc FrALoc = FrA.getFriendType()->getTypeLoc();
   TypeLoc FrBLoc = FrB.getFriendType()->getTypeLoc();
   bool FrAOwnsTag = FrALoc.getTypePtr()->getAs<TagType>()->isTagOwned();
-  TagDecl *FrATagDecl =
-      FrALoc.getTypePtr()->getAs<TagType>()->getOriginalDecl();
+  TagDecl *FrATagDecl = FrALoc.getTypePtr()->getAs<TagType>()->getDecl();
   bool FrBOwnsTag = FrBLoc.getTypePtr()->getAs<TagType>()->isTagOwned();
 
   EXPECT_THAT(Ctx.getParents(A), ElementsAre(DynTypedNode::create(TU)));
diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp
index e7160bc..4c7ea5e 100644
--- a/clang/unittests/AST/ASTImporterTest.cpp
+++ b/clang/unittests/AST/ASTImporterTest.cpp
@@ -29,7 +29,7 @@ using internal::Matcher;
 
 static const RecordDecl *getRecordDeclOfFriend(FriendDecl *FD) {
   QualType Ty = FD->getFriendType()->getType().getCanonicalType();
-  return cast<RecordType>(Ty)->getOriginalDecl();
+  return cast<RecordType>(Ty)->getDecl();
 }
 
 struct ImportExpr : TestImportBase {};
@@ -4614,7 +4614,7 @@ TEST_P(ImportFriendClasses, ImportOfClassDefinitionAndFwdFriendShouldBeLinked) {
   auto *Friend = FirstDeclMatcher<FriendDecl>().match(FromTU0, friendDecl());
   QualType FT = Friend->getFriendType()->getType();
   FT = FromTU0->getASTContext().getCanonicalType(FT);
-  auto *Fwd = cast<TagType>(FT)->getOriginalDecl();
+  auto *Fwd = cast<TagType>(FT)->getDecl();
   auto *ImportedFwd = Import(Fwd, Lang_CXX03);
   Decl *FromTU1 = getTuDecl(
       R"(
diff --git a/clang/unittests/AST/StructuralEquivalenceTest.cpp b/clang/unittests/AST/StructuralEquivalenceTest.cpp
index bee288d..24e20c7 100644
--- a/clang/unittests/AST/StructuralEquivalenceTest.cpp
+++ b/clang/unittests/AST/StructuralEquivalenceTest.cpp
@@ -719,13 +719,11 @@ TEST_F(StructuralEquivalenceRecordTest, AnonymousRecordsShouldBeInequivalent) {
   auto *A = FirstDeclMatcher<IndirectFieldDecl>().match(
       TU, indirectFieldDecl(hasName("a")));
   auto *FA = cast<FieldDecl>(A->chain().front());
-  RecordDecl *RA =
-      cast<RecordType>(FA->getType().getTypePtr())->getOriginalDecl();
+  RecordDecl *RA = cast<RecordType>(FA->getType().getTypePtr())->getDecl();
   auto *B = FirstDeclMatcher<IndirectFieldDecl>().match(
       TU, indirectFieldDecl(hasName("b")));
   auto *FB = cast<FieldDecl>(B->chain().front());
-  RecordDecl *RB =
-      cast<RecordType>(FB->getType().getTypePtr())->getOriginalDecl();
+  RecordDecl *RB = cast<RecordType>(FB->getType().getTypePtr())->getDecl();
 
   ASSERT_NE(RA, RB);
   EXPECT_TRUE(testStructuralMatch(RA, RA));
@@ -754,15 +752,13 @@ TEST_F(StructuralEquivalenceRecordTest,
   auto *A = FirstDeclMatcher<IndirectFieldDecl>().match(
       TU, indirectFieldDecl(hasName("a")));
   auto *FA = cast<FieldDecl>(A->chain().front());
-  RecordDecl *RA =
-      cast<RecordType>(FA->getType().getTypePtr())->getOriginalDecl();
+  RecordDecl *RA = cast<RecordType>(FA->getType().getTypePtr())->getDecl();
 
   auto *TU1 = get<1>(t);
   auto *A1 = FirstDeclMatcher<IndirectFieldDecl>().match(
       TU1, indirectFieldDecl(hasName("a")));
   auto *FA1 = cast<FieldDecl>(A1->chain().front());
-  RecordDecl *RA1 =
-      cast<RecordType>(FA1->getType().getTypePtr())->getOriginalDecl();
+  RecordDecl *RA1 = cast<RecordType>(FA1->getType().getTypePtr())->getDecl();
 
   RecordDecl *X =
       FirstDeclMatcher<RecordDecl>().match(TU, recordDecl(hasName("X")));
diff --git a/clang/unittests/Analysis/FlowSensitive/MockHeaders.cpp b/clang/unittests/Analysis/FlowSensitive/MockHeaders.cpp
index c280921..d3dee58 100644
--- a/clang/unittests/Analysis/FlowSensitive/MockHeaders.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/MockHeaders.cpp
@@ -27,6 +27,9 @@ using nullptr_t = decltype(nullptr);
 
 } // namespace std
 
+typedef decltype(sizeof(char)) size_t;
+typedef decltype(sizeof(char*)) ptrdiff_t;
+
 #endif // CSTDDEF_H
 )";
 
@@ -479,12 +482,38 @@ struct conjunction<T, Ts...>
 template <typename T>
 struct conjunction<T> : T {};
 
+template <typename... Ts>
+struct disjunction : std::false_type {};
+
+template <typename T, typename... Ts>
+struct disjunction<T, Ts...>
+    : std::conditional<T::value, T, disjunction<Ts...>>::type {};
+
+template <typename T>
+struct disjunction<T> : T {};
+
 template <typename T>
 struct negation : std::integral_constant<bool, !T::value> {};
 
 template <bool B, typename T = void>
 using enable_if_t = typename std::enable_if<B, T>::type;
 
+
+template <bool B, typename T, typename F>
+using conditional_t = typename std::conditional<B, T, F>::type;
+
+template <typename T>
+using remove_cv_t = typename std::remove_cv<T>::type;
+
+template <typename T>
+using remove_reference_t = typename std::remove_reference<T>::type;
+
+template <typename T>
+using decay_t = typename std::decay<T>::type;
+
+struct in_place_t {};
+
+constexpr in_place_t in_place;
 } // namespace absl
 
 #endif // ABSL_TYPE_TRAITS_H
@@ -499,8 +528,16 @@ namespace std {
 struct string {
   string(const char*);
   ~string();
+  const char *c_str() const;
+  bool empty();
+};
+
+struct string_view {
+  string_view(const char*);
+  ~string_view();
   bool empty();
 };
+
 bool operator!=(const string &LHS, const char *RHS);
 
 } // namespace std
@@ -792,9 +829,6 @@ struct nullopt_t {
 };
 constexpr nullopt_t nullopt;
 
-struct in_place_t {};
-constexpr in_place_t in_place;
-
 template <typename T>
 class optional;
 
@@ -1240,6 +1274,964 @@ constexpr bool operator!=(const T &value, const Optional<U> &opt);
 } // namespace base
 )";
 
+constexpr const char StatusDefsHeader[] =
+    R"cc(
+#ifndef STATUS_H_
+#define STATUS_H_
+
+#include "absl_type_traits.h"
+#include "std_initializer_list.h"
+#include "std_string.h"
+#include "std_type_traits.h"
+#include "std_utility.h"
+
+namespace absl {
+struct SourceLocation {
+  static constexpr SourceLocation current();
+  static constexpr SourceLocation
+  DoNotInvokeDirectlyNoSeriouslyDont(int line, const char *file_name);
+};
+} // namespace absl
+namespace absl {
+enum class StatusCode : int {
+  kOk,
+  kCancelled,
+  kUnknown,
+  kInvalidArgument,
+  kDeadlineExceeded,
+  kNotFound,
+  kAlreadyExists,
+  kPermissionDenied,
+  kResourceExhausted,
+  kFailedPrecondition,
+  kAborted,
+  kOutOfRange,
+  kUnimplemented,
+  kInternal,
+  kUnavailable,
+  kDataLoss,
+  kUnauthenticated,
+};
+} // namespace absl
+
+namespace absl {
+enum class StatusToStringMode : int {
+  kWithNoExtraData = 0,
+  kWithPayload = 1 << 0,
+  kWithSourceLocation = 1 << 1,
+  kWithEverything = ~kWithNoExtraData,
+  kDefault = kWithPayload,
+};
+class Status {
+public:
+  Status();
+  template <typename Enum> Status(Enum code, std::string_view msg);
+  Status(absl::StatusCode code, std::string_view msg,
+         absl::SourceLocation loc = SourceLocation::current());
+  Status(const Status &base_status, absl::SourceLocation loc);
+  Status(Status &&base_status, absl::SourceLocation loc);
+  ~Status() {}
+
+  Status(const Status &);
+  Status &operator=(const Status &x);
+
+  Status(Status &&) noexcept;
+  Status &operator=(Status &&);
+
+  friend bool operator==(const Status &, const Status &);
+  friend bool operator!=(const Status &, const Status &);
+
+  bool ok() const { return true; }
+  void CheckSuccess() const;
+  void IgnoreError() const;
+  int error_code() const;
+  absl::Status ToCanonical() const;
+  std::string
+  ToString(StatusToStringMode m = StatusToStringMode::kDefault) const;
+  void Update(const Status &new_status);
+  void Update(Status &&new_status);
+};
+
+bool operator==(const Status &lhs, const Status &rhs);
+bool operator!=(const Status &lhs, const Status &rhs);
+
+Status OkStatus();
+Status InvalidArgumentError(char *);
+
+#endif // STATUS_H
+)cc";
+
+constexpr const char StatusOrDefsHeader[] = R"cc(
+#ifndef STATUSOR_H_
+#define STATUSOR_H_
+#include "absl_type_traits.h"
+#include "status_defs.h"
+#include "std_initializer_list.h"
+#include "std_type_traits.h"
+#include "std_utility.h"
+
+template <typename T> struct StatusOr;
+
+namespace internal_statusor {
+
+template <typename T, typename U, typename = void>
+struct HasConversionOperatorToStatusOr : std::false_type {};
+
+template <typename T, typename U>
+void test(char (*)[sizeof(std::declval<U>().operator absl::StatusOr<T>())]);
+
+template <typename T, typename U>
+struct HasConversionOperatorToStatusOr<T, U, decltype(test<T, U>(0))>
+    : std::true_type {};
+
+template <typename T, typename U>
+using IsConstructibleOrConvertibleFromStatusOr =
+    absl::disjunction<std::is_constructible<T, StatusOr<U> &>,
+                      std::is_constructible<T, const StatusOr<U> &>,
+                      std::is_constructible<T, StatusOr<U> &&>,
+                      std::is_constructible<T, const StatusOr<U> &&>,
+                      std::is_convertible<StatusOr<U> &, T>,
+                      std::is_convertible<const StatusOr<U> &, T>,
+                      std::is_convertible<StatusOr<U> &&, T>,
+                      std::is_convertible<const StatusOr<U> &&, T>>;
+
+template <typename T, typename U>
+using IsConstructibleOrConvertibleOrAssignableFromStatusOr =
+    absl::disjunction<IsConstructibleOrConvertibleFromStatusOr<T, U>,
+                      std::is_assignable<T &, StatusOr<U> &>,
+                      std::is_assignable<T &, const StatusOr<U> &>,
+                      std::is_assignable<T &, StatusOr<U> &&>,
+                      std::is_assignable<T &, const StatusOr<U> &&>>;
+
+template <typename T, typename U>
+struct IsDirectInitializationAmbiguous
+    : public absl::conditional_t<
+          std::is_same<absl::remove_cv_t<absl::remove_reference_t<U>>,
+                       U>::value,
+          std::false_type,
+          IsDirectInitializationAmbiguous<
+              T, absl::remove_cv_t<absl::remove_reference_t<U>>>> {};
+
+template <typename T, typename V>
+struct IsDirectInitializationAmbiguous<T, absl::StatusOr<V>>
+    : public IsConstructibleOrConvertibleFromStatusOr<T, V> {};
+
+template <typename T, typename U>
+using IsDirectInitializationValid = absl::disjunction<
+    // Short circuits if T is basically U.
+    std::is_same<T, absl::remove_cv_t<absl::remove_reference_t<U>>>,
+    absl::negation<absl::disjunction<
+        std::is_same<absl::StatusOr<T>,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        std::is_same<absl::Status,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        std::is_same<absl::in_place_t,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        IsDirectInitializationAmbiguous<T, U>>>>;
+
+template <typename T, typename U>
+struct IsForwardingAssignmentAmbiguous
+    : public absl::conditional_t<
+          std::is_same<absl::remove_cv_t<absl::remove_reference_t<U>>,
+                       U>::value,
+          std::false_type,
+          IsForwardingAssignmentAmbiguous<
+              T, absl::remove_cv_t<absl::remove_reference_t<U>>>> {};
+
+template <typename T, typename U>
+struct IsForwardingAssignmentAmbiguous<T, absl::StatusOr<U>>
+    : public IsConstructibleOrConvertibleOrAssignableFromStatusOr<T, U> {};
+
+template <typename T, typename U>
+using IsForwardingAssignmentValid = absl::disjunction<
+    // Short circuits if T is basically U.
+    std::is_same<T, absl::remove_cv_t<absl::remove_reference_t<U>>>,
+    absl::negation<absl::disjunction<
+        std::is_same<absl::StatusOr<T>,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        std::is_same<absl::Status,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        std::is_same<absl::in_place_t,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        IsForwardingAssignmentAmbiguous<T, U>>>>;
+
+template <typename T, typename U>
+using IsForwardingAssignmentValid = absl::disjunction<
+    // Short circuits if T is basically U.
+    std::is_same<T, absl::remove_cv_t<absl::remove_reference_t<U>>>,
+    absl::negation<absl::disjunction<
+        std::is_same<absl::StatusOr<T>,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        std::is_same<absl::Status,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        std::is_same<absl::in_place_t,
+                     absl::remove_cv_t<absl::remove_reference_t<U>>>,
+        IsForwardingAssignmentAmbiguous<T, U>>>>;
+
+template <typename T> struct OperatorBase {
+  const T &value() const &;
+  T &value() &;
+  const T &&value() const &&;
+  T &&value() &&;
+
+  const T &operator*() const &;
+  T &operator*() &;
+  const T &&operator*() const &&;
+  T &&operator*() &&;
+
+  // To test that analyses are okay if there is a use of operator*
+  // within this base class.
+  const T *operator->() const { return __builtin_addressof(**this); }
+  T *operator->() { return __builtin_addressof(**this); }
+};
+
+} // namespace internal_statusor
+
+template <typename T>
+struct StatusOr : private internal_statusor::OperatorBase<T> {
+  explicit StatusOr();
+
+  StatusOr(const StatusOr &) = default;
+  StatusOr &operator=(const StatusOr &) = default;
+
+  StatusOr(StatusOr &&) = default;
+  StatusOr &operator=(StatusOr &&) = default;
+
+  template <
+      typename U,
+      absl::enable_if_t<
+          absl::conjunction<
+              absl::negation<std::is_same<T, U>>,
+              std::is_constructible<T, const U &>,
+              std::is_convertible<const U &, T>,
+              absl::negation<
+                  internal_statusor::IsConstructibleOrConvertibleFromStatusOr<
+                      T, U>>>::value,
+          int> = 0>
+  StatusOr(const StatusOr<U> &);
+
+  template <
+      typename U,
+      absl::enable_if_t<
+          absl::conjunction<
+              absl::negation<std::is_same<T, U>>,
+              std::is_constructible<T, const U &>,
+              absl::negation<std::is_convertible<const U &, T>>,
+              absl::negation<
+                  internal_statusor::IsConstructibleOrConvertibleFromStatusOr<
+                      T, U>>>::value,
+          int> = 0>
+  explicit StatusOr(const StatusOr<U> &);
+
+  template <
+      typename U,
+      absl::enable_if_t<
+          absl::conjunction<
+              absl::negation<std::is_same<T, U>>,
+              std::is_constructible<T, U &&>, std::is_convertible<U &&, T>,
+              absl::negation<
+                  internal_statusor::IsConstructibleOrConvertibleFromStatusOr<
+                      T, U>>>::value,
+          int> = 0>
+  StatusOr(StatusOr<U> &&);
+
+  template <
+      typename U,
+      absl::enable_if_t<
+          absl::conjunction<
+              absl::negation<std::is_same<T, U>>,
+              std::is_constructible<T, U &&>,
+              absl::negation<std::is_convertible<U &&, T>>,
+              absl::negation<
+                  internal_statusor::IsConstructibleOrConvertibleFromStatusOr<
+                      T, U>>>::value,
+          int> = 0>
+  explicit StatusOr(StatusOr<U> &&);
+
+  template <
+      typename U,
+      absl::enable_if_t<
+          absl::conjunction<
+              absl::negation<std::is_same<T, U>>,
+              std::is_constructible<T, const U &>,
+              std::is_assignable<T, const U &>,
+              absl::negation<
+                  internal_statusor::
+                      IsConstructibleOrConvertibleOrAssignableFromStatusOr<
+                          T, U>>>::value,
+          int> = 0>
+  StatusOr &operator=(const StatusOr<U> &);
+
+  template <
+      typename U,
+      absl::enable_if_t<
+          absl::conjunction<
+              absl::negation<std::is_same<T, U>>,
+              std::is_constructible<T, U &&>, std::is_assignable<T, U &&>,
+              absl::negation<
+                  internal_statusor::
+                      IsConstructibleOrConvertibleOrAssignableFromStatusOr<
+                          T, U>>>::value,
+          int> = 0>
+  StatusOr &operator=(StatusOr<U> &&);
+
+  template <
+      typename U = absl::Status,
+      absl::enable_if_t<
+          absl::conjunction<
+              std::is_convertible<U &&, absl::Status>,
+              std::is_constructible<absl::Status, U &&>,
+              absl::negation<std::is_same<absl::decay_t<U>, absl::StatusOr<T>>>,
+              absl::negation<std::is_same<absl::decay_t<U>, T>>,
+              absl::negation<std::is_same<absl::decay_t<U>, absl::in_place_t>>,
+              absl::negation<internal_statusor::HasConversionOperatorToStatusOr<
+                  T, U &&>>>::value,
+          int> = 0>
+  StatusOr(U &&);
+
+  template <
+      typename U = absl::Status,
+      absl::enable_if_t<
+          absl::conjunction<
+              absl::negation<std::is_convertible<U &&, absl::Status>>,
+              std::is_constructible<absl::Status, U &&>,
+              absl::negation<std::is_same<absl::decay_t<U>, absl::StatusOr<T>>>,
+              absl::negation<std::is_same<absl::decay_t<U>, T>>,
+              absl::negation<std::is_same<absl::decay_t<U>, absl::in_place_t>>,
+              absl::negation<internal_statusor::HasConversionOperatorToStatusOr<
+                  T, U &&>>>::value,
+          int> = 0>
+  explicit StatusOr(U &&);
+
+  template <
+      typename U = absl::Status,
+      absl::enable_if_t<
+          absl::conjunction<
+              std::is_convertible<U &&, absl::Status>,
+              std::is_constructible<absl::Status, U &&>,
+              absl::negation<std::is_same<absl::decay_t<U>, absl::StatusOr<T>>>,
+              absl::negation<std::is_same<absl::decay_t<U>, T>>,
+              absl::negation<std::is_same<absl::decay_t<U>, absl::in_place_t>>,
+              absl::negation<internal_statusor::HasConversionOperatorToStatusOr<
+                  T, U &&>>>::value,
+          int> = 0>
+  StatusOr &operator=(U &&);
+
+  template <
+      typename U = T,
+      typename = typename std::enable_if<absl::conjunction<
+          std::is_constructible<T, U &&>, std::is_assignable<T &, U &&>,
+          absl::disjunction<
+              std::is_same<absl::remove_cv_t<absl::remove_reference_t<U>>, T>,
+              absl::conjunction<
+                  absl::negation<std::is_convertible<U &&, absl::Status>>,
+                  absl::negation<
+                      internal_statusor::HasConversionOperatorToStatusOr<
+                          T, U &&>>>>,
+          internal_statusor::IsForwardingAssignmentValid<T, U &&>>::value>::
+          type>
+  StatusOr &operator=(U &&);
+
+  template <typename... Args> explicit StatusOr(absl::in_place_t, Args &&...);
+
+  template <typename U, typename... Args>
+  explicit StatusOr(absl::in_place_t, std::initializer_list<U>, Args &&...);
+
+  template <
+      typename U = T,
+      absl::enable_if_t<
+          absl::conjunction<
+              internal_statusor::IsDirectInitializationValid<T, U &&>,
+              std::is_constructible<T, U &&>, std::is_convertible<U &&, T>,
+              absl::disjunction<
+                  std::is_same<absl::remove_cv_t<absl::remove_reference_t<U>>,
+                               T>,
+                  absl::conjunction<
+                      absl::negation<std::is_convertible<U &&, absl::Status>>,
+                      absl::negation<
+                          internal_statusor::HasConversionOperatorToStatusOr<
+                              T, U &&>>>>>::value,
+          int> = 0>
+  StatusOr(U &&);
+
+  template <
+      typename U = T,
+      absl::enable_if_t<
+          absl::conjunction<
+              internal_statusor::IsDirectInitializationValid<T, U &&>,
+              absl::disjunction<
+                  std::is_same<absl::remove_cv_t<absl::remove_reference_t<U>>,
+                               T>,
+                  absl::conjunction<
+                      absl::negation<std::is_constructible<absl::Status, U &&>>,
+                      absl::negation<
+                          internal_statusor::HasConversionOperatorToStatusOr<
+                              T, U &&>>>>,
+              std::is_constructible<T, U &&>,
+              absl::negation<std::is_convertible<U &&, T>>>::value,
+          int> = 0>
+  explicit StatusOr(U &&);
+
+  bool ok() const;
+
+  const Status &status() const & { return status_; }
+  Status status() &&;
+
+  using StatusOr::OperatorBase::value;
+
+  const T &ValueOrDie() const &;
+  T &ValueOrDie() &;
+  const T &&ValueOrDie() const &&;
+  T &&ValueOrDie() &&;
+
+  using StatusOr::OperatorBase::operator*;
+  using StatusOr::OperatorBase::operator->;
+
+  template <typename U> T value_or(U &&default_value) const &;
+  template <typename U> T value_or(U &&default_value) &&;
+
+  template <typename... Args> T &emplace(Args &&...args);
+
+  template <
+      typename U, typename... Args,
+      absl::enable_if_t<std::is_constructible<T, std::initializer_list<U> &,
+                                              Args &&...>::value,
+                        int> = 0>
+  T &emplace(std::initializer_list<U> ilist, Args &&...args);
+
+private:
+  absl::Status status_;
+};
+
+template <typename T>
+bool operator==(const StatusOr<T> &lhs, const StatusOr<T> &rhs);
+
+template <typename T>
+bool operator!=(const StatusOr<T> &lhs, const StatusOr<T> &rhs);
+
+} // namespace absl
+
+#endif // STATUSOR_H_
+)cc";
+
+static constexpr char StdVectorHeader[] = R"cc(
+#ifndef STD_VECTOR_H
+#define STD_VECTOR_H
+namespace std {
+template <class T> struct allocator {
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  typedef T *pointer;
+  typedef const T *const_pointer;
+  typedef T value_type;
+
+  T *allocate(size_t n);
+};
+
+template <class Alloc> struct allocator_traits {
+  typedef Alloc allocator_type;
+  typedef typename allocator_type::value_type value_type;
+  typedef typename allocator_type::pointer pointer;
+  typedef typename allocator_type::const_pointer const_pointer;
+  typedef typename allocator_type::difference_type difference_type;
+  typedef typename allocator_type::size_type size_type;
+};
+
+template <typename T, class Allocator = allocator<T>> class vector {
+public:
+  using value_type = T;
+  using size_type = typename allocator_traits<Allocator>::size_type;
+
+  // Constructors.
+  vector() {}
+  vector(size_type, const Allocator & = Allocator()) {}
+  vector(initializer_list<T> initializer_list,
+         const Allocator & = Allocator()) {}
+  vector(const vector &vector) {}
+  ~vector();
+
+  // Modifiers.
+  void push_back(const T &value);
+  void push_back(T &&value);
+  template <typename... Args> T &emplace_back(Args &&...args);
+
+  // Iterators
+  class InputIterator {
+  public:
+    InputIterator(const InputIterator &);
+    ~InputIterator();
+    InputIterator &operator=(const InputIterator &);
+    InputIterator &operator++();
+    T &operator*() const;
+    bool operator!=(const InputIterator &) const;
+    bool operator==(const InputIterator &) const;
+  };
+  typedef InputIterator iterator;
+  typedef const InputIterator const_iterator;
+  iterator begin() noexcept;
+  const_iterator begin() const noexcept;
+  const_iterator cbegin() const noexcept;
+  iterator end() noexcept;
+  const_iterator end() const noexcept;
+  const_iterator cend() const noexcept;
+  T *data() noexcept;
+  const T *data() const noexcept;
+  T &operator[](int n);
+  const T &operator[](int n) const;
+  T &at(int n);
+  const T &at(int n) const;
+  size_t size() const;
+};
+} // namespace std
+#endif // STD_VECTOR_H
+)cc";
+
+static constexpr char StdPairHeader[] = R"cc(
+#ifndef STD_PAIR_H
+#define STD_PAIR_H
+namespace std {
+template <class T1, class T2> struct pair {
+  T1 first;
+  T2 second;
+
+  typedef T1 first_type;
+  typedef T2 second_type;
+
+  constexpr pair();
+
+  template <class U1, class U2> pair(pair<U1, U2> &&p);
+
+  template <class U1, class U2> pair(U1 &&x, U2 &&y);
+};
+
+template <class T1, class T2> pair<T1, T2> make_pair(T1 &&t1, T2 &&t2);
+} // namespace std
+#endif // STD_PAIR_H
+)cc";
+
+constexpr const char AbslLogHeader[] = R"cc(
+#ifndef ABSL_LOG_H
+#define ABSL_LOG_H
+
+#include "std_pair.h"
+
+namespace absl {
+
+#define ABSL_PREDICT_FALSE(x) (__builtin_expect(false || (x), false))
+#define ABSL_PREDICT_TRUE(x) (__builtin_expect(false || (x), true))
+
+namespace log_internal {
+class LogMessage {
+public:
+  LogMessage();
+  LogMessage &stream();
+  LogMessage &InternalStream();
+  LogMessage &WithVerbosity(int verboselevel);
+  template <typename T> LogMessage &operator<<(const T &);
+};
+class LogMessageFatal : public LogMessage {
+public:
+  LogMessageFatal();
+  ~LogMessageFatal() __attribute__((noreturn));
+};
+class LogMessageQuietlyFatal : public LogMessage {
+public:
+  LogMessageQuietlyFatal();
+  ~LogMessageQuietlyFatal() __attribute__((noreturn));
+};
+class Voidify final {
+public:
+  // This has to be an operator with a precedence lower than << but higher
+  // than
+  // ?:
+  template <typename T> void operator&&(const T &) const && {}
+};
+} // namespace log_internal
+} // namespace absl
+
+#ifndef NULL
+#define NULL __null
+#endif
+extern "C" void abort() {}
+#define ABSL_LOG_INTERNAL_LOG_INFO ::absl::log_internal::LogMessage()
+#define ABSL_LOG_INTERNAL_LOG_WARNING ::absl::log_internal::LogMessage()
+#define ABSL_LOG_INTERNAL_LOG_ERROR ::absl::log_internal::LogMessage()
+#define ABSL_LOG_INTERNAL_LOG_FATAL ::absl::log_internal::LogMessageFatal()
+#define ABSL_LOG_INTERNAL_LOG_QFATAL                                           \
+  ::absl::log_internal::LogMessageQuietlyFatal()
+#define LOG(severity) ABSL_LOG_INTERNAL_LOG_##severity.InternalStream()
+
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL(lit) lit
+
+#define ABSL_LOG_INTERNAL_STATELESS_CONDITION(condition)                       \
+  switch (0)                                                                   \
+  case 0:                                                                      \
+  default:                                                                     \
+    !(condition) ? (void)0 : ::absl::log_internal::Voidify() &&
+
+#define ABSL_LOG_INTERNAL_CONDITION_INFO(type, condition)                      \
+  ABSL_LOG_INTERNAL_##type##_CONDITION(condition)
+
+#define ABSL_LOG_INTERNAL_CONDITION_FATAL(type, condition)                     \
+  ABSL_LOG_INTERNAL_##type##_CONDITION(condition)
+
+#define ABSL_LOG_INTERNAL_CONDITION_QFATAL(type, condition)                    \
+  ABSL_LOG_INTERNAL_##type##_CONDITION(condition)
+
+#define ABSL_CHECK_IMPL(condition, condition_text)                             \
+  ABSL_LOG_INTERNAL_CONDITION_FATAL(STATELESS,                                 \
+                                    ABSL_PREDICT_FALSE(!(condition)))          \
+  ABSL_LOG_INTERNAL_CHECK(condition_text).InternalStream()
+
+#define ABSL_QCHECK_IMPL(condition, condition_text)                            \
+  ABSL_LOG_INTERNAL_CONDITION_QFATAL(STATELESS,                                \
+                                     ABSL_PREDICT_FALSE(!(condition)))         \
+  ABSL_LOG_INTERNAL_QCHECK(condition_text).InternalStream()
+
+#define CHECK(condition) ABSL_CHECK_IMPL((condition), #condition)
+#define DCHECK(condition) CHECK(condition)
+#define QCHECK(condition) ABSL_QCHECK_IMPL((condition), #condition)
+
+#define ABSL_LOG_INTERNAL_MAX_LOG_VERBOSITY_CHECK(x)
+
+namespace absl {
+
+template <typename T> class StatusOr;
+class Status;
+
+namespace status_internal {
+std::string *MakeCheckFailString(const absl::Status *status,
+                                 const char *prefix);
+} // namespace status_internal
+
+namespace log_internal {
+template <class T> const T &GetReferenceableValue(const T &t);
+char GetReferenceableValue(char t);
+unsigned char GetReferenceableValue(unsigned char t);
+signed char GetReferenceableValue(signed char t);
+short GetReferenceableValue(short t);
+unsigned short GetReferenceableValue(unsigned short t);
+int GetReferenceableValue(int t);
+unsigned int GetReferenceableValue(unsigned int t);
+long GetReferenceableValue(long t);
+unsigned long GetReferenceableValue(unsigned long t);
+long long GetReferenceableValue(long long t);
+unsigned long long GetReferenceableValue(unsigned long long t);
+const absl::Status *AsStatus(const absl::Status &s);
+template <typename T> const absl::Status *AsStatus(const absl::StatusOr<T> &s);
+} // namespace log_internal
+} // namespace absl
+// TODO(tkd): this still doesn't allow operator<<, unlike the real CHECK_
+// macros.
+#define ABSL_LOG_INTERNAL_CHECK_OP(name, op, val1, val2)                       \
+  while (char *_result = ::absl::log_internal::name##Impl(                     \
+             ::absl::log_internal::GetReferenceableValue(val1),                \
+             ::absl::log_internal::GetReferenceableValue(val2),                \
+             #val1 " " #op " " #val2))                                         \
+  (void)0
+#define ABSL_LOG_INTERNAL_QCHECK_OP(name, op, val1, val2)                      \
+  while (char *_result = ::absl::log_internal::name##Impl(                     \
+             ::absl::log_internal::GetReferenceableValue(val1),                \
+             ::absl::log_internal::GetReferenceableValue(val2),                \
+             #val1 " " #op " " #val2))                                         \
+  (void)0
+namespace absl {
+namespace log_internal {
+template <class T1, class T2>
+char *Check_NEImpl(const T1 &v1, const T2 &v2, const char *names);
+template <class T1, class T2>
+char *Check_EQImpl(const T1 &v1, const T2 &v2, const char *names);
+template <class T1, class T2>
+char *Check_LTImpl(const T1 &v1, const T2 &v2, const char *names);
+
+#define CHECK_EQ(a, b) ABSL_LOG_INTERNAL_CHECK_OP(Check_EQ, ==, a, b)
+#define CHECK_NE(a, b) ABSL_LOG_INTERNAL_CHECK_OP(Check_NE, !=, a, b)
+#define CHECK_LT(a, b) ABSL_LOG_INTERNAL_CHECK_OP(Check_EQ, <, a, b)
+
+#define QCHECK_EQ(a, b) ABSL_LOG_INTERNAL_QCHECK_OP(Check_EQ, ==, a, b)
+#define QCHECK_NE(a, b) ABSL_LOG_INTERNAL_QCHECK_OP(Check_NE, !=, a, b)
+} // namespace log_internal
+} // namespace absl
+
+#define CHECK_NOTNULL(x) CHECK((x) != nullptr)
+
+#define ABSL_LOG_INTERNAL_CHECK(failure_message)                               \
+  ::absl::log_internal::LogMessageFatal()
+#define ABSL_LOG_INTERNAL_QCHECK(failure_message)                              \
+  ::absl::log_internal::LogMessageQuietlyFatal()
+#define ABSL_LOG_INTERNAL_CHECK_OK(val)                                        \
+  for (::std::pair<const ::absl::Status *, ::std::string *>                    \
+           absl_log_internal_check_ok_goo;                                     \
+       absl_log_internal_check_ok_goo.first =                                  \
+           ::absl::log_internal::AsStatus(val),                                \
+       absl_log_internal_check_ok_goo.second =                                 \
+           ABSL_PREDICT_TRUE(absl_log_internal_check_ok_goo.first->ok())       \
+               ? nullptr                                                       \
+               : ::absl::status_internal::MakeCheckFailString(                 \
+                     absl_log_internal_check_ok_goo.first,                     \
+                     ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL(#val " is OK")),   \
+       !ABSL_PREDICT_TRUE(absl_log_internal_check_ok_goo.first->ok());)        \
+  ABSL_LOG_INTERNAL_CHECK(*absl_log_internal_check_ok_goo.second)              \
+      .InternalStream()
+#define ABSL_LOG_INTERNAL_QCHECK_OK(val)                                       \
+  for (::std::pair<const ::absl::Status *, ::std::string *>                    \
+           absl_log_internal_check_ok_goo;                                     \
+       absl_log_internal_check_ok_goo.first =                                  \
+           ::absl::log_internal::AsStatus(val),                                \
+       absl_log_internal_check_ok_goo.second =                                 \
+           ABSL_PREDICT_TRUE(absl_log_internal_check_ok_goo.first->ok())       \
+               ? nullptr                                                       \
+               : ::absl::status_internal::MakeCheckFailString(                 \
+                     absl_log_internal_check_ok_goo.first,                     \
+                     ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL(#val " is OK")),   \
+       !ABSL_PREDICT_TRUE(absl_log_internal_check_ok_goo.first->ok());)        \
+  ABSL_LOG_INTERNAL_QCHECK(*absl_log_internal_check_ok_goo.second)             \
+      .InternalStream()
+
+#define CHECK_OK(val) ABSL_LOG_INTERNAL_CHECK_OK(val)
+#define DCHECK_OK(val) ABSL_LOG_INTERNAL_CHECK_OK(val)
+#define QCHECK_OK(val) ABSL_LOG_INTERNAL_QCHECK_OK(val)
+
+#endif // ABSL_LOG_H
+)cc";
+
+constexpr const char TestingDefsHeader[] = R"cc(
+#pragma clang system_header
+
+#ifndef TESTING_DEFS_H
+#define TESTING_DEFS_H
+
+#include "absl_type_traits.h"
+#include "std_initializer_list.h"
+#include "std_string.h"
+#include "std_type_traits.h"
+#include "std_utility.h"
+
+namespace testing {
+struct AssertionResult {
+  template <typename T>
+  explicit AssertionResult(const T &res, bool enable_if = true) {}
+  ~AssertionResult();
+  operator bool() const;
+  template <typename T> AssertionResult &operator<<(const T &value);
+  const char *failure_message() const;
+};
+
+class TestPartResult {
+public:
+  enum Type { kSuccess, kNonFatalFailure, kFatalFailure, kSkip };
+};
+
+class Test {
+public:
+  virtual ~Test() = default;
+
+protected:
+  virtual void SetUp() {}
+};
+
+class Message {
+public:
+  template <typename T> Message &operator<<(const T &val);
+};
+
+namespace internal {
+class AssertHelper {
+public:
+  AssertHelper(TestPartResult::Type type, const char *file, int line,
+               const char *message);
+  void operator=(const Message &message) const;
+};
+
+class EqHelper {
+public:
+  template <typename T1, typename T2>
+  static AssertionResult Compare(const char *lhx, const char *rhx,
+                                 const T1 &lhs, const T2 &rhs);
+};
+
+#define GTEST_IMPL_CMP_HELPER_(op_name)                                        \
+  template <typename T1, typename T2>                                          \
+  AssertionResult CmpHelper##op_name(const char *expr1, const char *expr2,     \
+                                     const T1 &val1, const T2 &val2);
+
+GTEST_IMPL_CMP_HELPER_(NE)
+GTEST_IMPL_CMP_HELPER_(LE)
+GTEST_IMPL_CMP_HELPER_(LT)
+GTEST_IMPL_CMP_HELPER_(GE)
+GTEST_IMPL_CMP_HELPER_(GT)
+
+#undef GTEST_IMPL_CMP_HELPER_
+
+std::string GetBoolAssertionFailureMessage(
+    const AssertionResult &assertion_result, const char *expression_text,
+    const char *actual_predicate_value, const char *expected_predicate_value);
+
+template <typename M> class PredicateFormatterFromMatcher {
+public:
+  template <typename T>
+  AssertionResult operator()(const char *value_text, const T &x) const;
+};
+
+template <typename M>
+inline PredicateFormatterFromMatcher<M>
+MakePredicateFormatterFromMatcher(M matcher) {
+  return PredicateFormatterFromMatcher<M>();
+}
+} // namespace internal
+
+namespace status {
+namespace internal_status {
+class IsOkMatcher {};
+
+class StatusIsMatcher {};
+
+class CanonicalStatusIsMatcher {};
+
+template <typename M> class IsOkAndHoldsMatcher {};
+
+} // namespace internal_status
+
+internal_status::IsOkMatcher IsOk();
+
+template <typename StatusCodeMatcher>
+internal_status::StatusIsMatcher StatusIs(StatusCodeMatcher &&code_matcher);
+
+template <typename StatusCodeMatcher>
+internal_status::CanonicalStatusIsMatcher
+CanonicalStatusIs(StatusCodeMatcher &&code_matcher);
+
+template <typename InnerMatcher>
+internal_status::IsOkAndHoldsMatcher<InnerMatcher> IsOkAndHolds(InnerMatcher m);
+} // namespace status
+
+class IsTrueMatcher {};
+IsTrueMatcher IsTrue();
+
+class IsFalseMatcher {};
+IsFalseMatcher IsFalse();
+
+} // namespace testing
+
+namespace absl_testing {
+namespace status_internal {
+class IsOkMatcher {};
+template <typename M> class IsOkAndHoldsMatcher {};
+class StatusIsMatcher {};
+class CanonicalStatusIsMatcher {};
+} // namespace status_internal
+status_internal::IsOkMatcher IsOk();
+template <typename InnerMatcher>
+status_internal::IsOkAndHoldsMatcher<InnerMatcher> IsOkAndHolds(InnerMatcher m);
+template <typename StatusCodeMatcher>
+status_internal::StatusIsMatcher StatusIs(StatusCodeMatcher &&code_matcher);
+
+template <typename StatusCodeMatcher>
+status_internal::CanonicalStatusIsMatcher
+CanonicalStatusIs(StatusCodeMatcher &&code_matcher);
+} // namespace absl_testing
+
+using testing::AssertionResult;
+#define EXPECT_TRUE(x)                                                         \
+  switch (0)                                                                   \
+  case 0:                                                                      \
+  default:                                                                     \
+    if (const AssertionResult gtest_ar_ = AssertionResult(x)) {                \
+    } else /* NOLINT */                                                        \
+      ::testing::Message()
+#define EXPECT_FALSE(x) EXPECT_TRUE(!(x))
+
+#define GTEST_AMBIGUOUS_ELSE_BLOCKER_                                          \
+  switch (0)                                                                   \
+  case 0:                                                                      \
+  default:
+
+#define GTEST_ASSERT_(expression, on_failure)                                  \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                                \
+  if (const ::testing::AssertionResult gtest_ar = (expression))                \
+    ;                                                                          \
+  else                                                                         \
+    on_failure(gtest_ar.failure_message())
+#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)                       \
+  GTEST_ASSERT_(pred_format(#v1, v1), on_failure)
+#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)                   \
+  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), on_failure)
+#define GTEST_MESSAGE_AT_(file, line, message, result_type)                    \
+  ::testing::internal::AssertHelper(result_type, file, line, message) =        \
+      ::testing::Message()
+#define GTEST_MESSAGE_(message, result_type)                                   \
+  GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)
+#define GTEST_FATAL_FAILURE_(message)                                          \
+  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
+#define GTEST_NONFATAL_FAILURE_(message)                                       \
+  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
+
+#define ASSERT_PRED_FORMAT1(pred_format, v1)                                   \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT2(pred_format, v1, v2)                               \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
+
+#define ASSERT_THAT(value, matcher)                                            \
+  ASSERT_PRED_FORMAT1(                                                         \
+      ::testing::internal::MakePredicateFormatterFromMatcher(matcher), value)
+#define ASSERT_OK(x) ASSERT_THAT(x, ::testing::status::IsOk())
+
+#define EXPECT_PRED_FORMAT1(pred_format, v1)                                   \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED_FORMAT2(pred_format, v1, v2)                               \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_THAT(value, matcher)                                            \
+  EXPECT_PRED_FORMAT1(                                                         \
+      ::testing::internal::MakePredicateFormatterFromMatcher(matcher), value)
+#define EXPECT_OK(expression) EXPECT_THAT(expression, ::testing::status::IsOk())
+
+#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail)          \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                                \
+  if (const ::testing::AssertionResult gtest_ar_ =                             \
+          ::testing::AssertionResult(expression))                              \
+    ;                                                                          \
+  else                                                                         \
+    fail(::testing::internal::GetBoolAssertionFailureMessage(                  \
+             gtest_ar_, text, #actual, #expected)                              \
+             .c_str())
+#define GTEST_ASSERT_TRUE(condition)                                           \
+  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, GTEST_FATAL_FAILURE_)
+#define GTEST_ASSERT_FALSE(condition)                                          \
+  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false,                   \
+                      GTEST_FATAL_FAILURE_)
+#define ASSERT_TRUE(condition) GTEST_ASSERT_TRUE(condition)
+#define ASSERT_FALSE(condition) GTEST_ASSERT_FALSE(condition)
+
+#define EXPECT_EQ(x, y)                                                        \
+  EXPECT_PRED_FORMAT2(::testing::internal::EqHelper::Compare, x, y)
+#define EXPECT_NE(x, y)                                                        \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, x, y)
+#define EXPECT_LT(x, y)                                                        \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, x, y)
+#define EXPECT_GT(x, y)                                                        \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, x, y)
+#define EXPECT_LE(x, y)                                                        \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, x, y)
+#define EXPECT_GE(x, y)                                                        \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, x, y)
+
+#define ASSERT_EQ(x, y)                                                        \
+  ASSERT_PRED_FORMAT2(testing::internal::EqHelper::Compare, x, y)
+#define ASSERT_NE(x, y)                                                        \
+  ASSERT_PRED_FORMAT2(testing::internal::CmpHelperNE, x, y)
+#define ASSERT_LT(x, y)                                                        \
+  ASSERT_PRED_FORMAT2(testing::internal::CmpHelperLT, x, y)
+#define ASSERT_GT(x, y)                                                        \
+  ASSERT_PRED_FORMAT2(testing::internal::CmpHelperGT, x, y)
+#define ASSERT_LE(x, y)                                                        \
+  ASSERT_PRED_FORMAT2(testing::internal::CmpHelperLE, x, y)
+#define ASSERT_GE(x, y)                                                        \
+  ASSERT_PRED_FORMAT2(testing::internal::CmpHelperGE, x, y)
+
+#endif // TESTING_DEFS_H
+)cc";
+
 std::vector<std::pair<std::string, std::string>> getMockHeaders() {
   std::vector<std::pair<std::string, std::string>> Headers;
   Headers.emplace_back("cstddef.h", CStdDefHeader);
@@ -1251,6 +2243,12 @@ std::vector<std::pair<std::string, std::string>> getMockHeaders() {
   Headers.emplace_back("absl_type_traits.h", AbslTypeTraitsHeader);
   Headers.emplace_back("absl_optional.h", AbslOptionalHeader);
   Headers.emplace_back("base_optional.h", BaseOptionalHeader);
+  Headers.emplace_back("std_vector.h", StdVectorHeader);
+  Headers.emplace_back("std_pair.h", StdPairHeader);
+  Headers.emplace_back("status_defs.h", StatusDefsHeader);
+  Headers.emplace_back("statusor_defs.h", StatusOrDefsHeader);
+  Headers.emplace_back("absl_log.h", AbslLogHeader);
+  Headers.emplace_back("testing_defs.h", TestingDefsHeader);
   return Headers;
 }
 
diff --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt
index 8c8baa5..f20c8db 100644
--- a/clang/unittests/Basic/CMakeLists.txt
+++ b/clang/unittests/Basic/CMakeLists.txt
@@ -6,6 +6,7 @@ add_distinct_clang_unittest(BasicTests
   DiagnosticTest.cpp
   FileEntryTest.cpp
   FileManagerTest.cpp
+  LangOptionsTest.cpp
   LineOffsetMappingTest.cpp
   OffloadArchTest.cpp
   SanitizersTest.cpp
diff --git a/clang/unittests/Basic/LangOptionsTest.cpp b/clang/unittests/Basic/LangOptionsTest.cpp
new file mode 100644
index 0000000..0d7d5ec
--- /dev/null
+++ b/clang/unittests/Basic/LangOptionsTest.cpp
@@ -0,0 +1,56 @@
+//===- unittests/Basic/LangOptionsTest.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/LangOptions.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace clang;
+
+namespace {
+TEST(LangOptsTest, CStdLang) {
+  LangOptions opts;
+  EXPECT_FALSE(opts.getCLangStd());
+  opts.GNUMode = 0;
+  opts.Digraphs = 1;
+  EXPECT_EQ(opts.getCLangStd(), 199409);
+  opts.C99 = 1;
+  EXPECT_EQ(opts.getCLangStd(), 199901);
+  opts.C11 = 1;
+  EXPECT_EQ(opts.getCLangStd(), 201112);
+  opts.C17 = 1;
+  EXPECT_EQ(opts.getCLangStd(), 201710);
+  opts.C23 = 1;
+  EXPECT_EQ(opts.getCLangStd(), 202311);
+  opts.C2y = 1;
+  EXPECT_EQ(opts.getCLangStd(), 202400);
+
+  EXPECT_FALSE(opts.getCPlusPlusLangStd());
+}
+
+TEST(LangOptsTest, CppStdLang) {
+  LangOptions opts;
+  EXPECT_FALSE(opts.getCPlusPlusLangStd());
+  opts.CPlusPlus = 1;
+  EXPECT_EQ(opts.getCPlusPlusLangStd(), 199711);
+  opts.CPlusPlus11 = 1;
+  EXPECT_EQ(opts.getCPlusPlusLangStd(), 201103);
+  opts.CPlusPlus14 = 1;
+  EXPECT_EQ(opts.getCPlusPlusLangStd(), 201402);
+  opts.CPlusPlus17 = 1;
+  EXPECT_EQ(opts.getCPlusPlusLangStd(), 201703);
+  opts.CPlusPlus20 = 1;
+  EXPECT_EQ(opts.getCPlusPlusLangStd(), 202002);
+  opts.CPlusPlus23 = 1;
+  EXPECT_EQ(opts.getCPlusPlusLangStd(), 202302);
+  opts.CPlusPlus26 = 1;
+  EXPECT_EQ(opts.getCPlusPlusLangStd(), 202400);
+
+  EXPECT_FALSE(opts.getCLangStd());
+}
+} // namespace
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 450c34f..b9ad930 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -19539,6 +19539,15 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
       "int j      = 2;",
       Alignment);
 
+  verifyFormat("int abcdefghijk = 111;\n"
+               "auto lambda     = [] {\n"
+               "  int c = call(1, //\n"
+               "               2, //\n"
+               "               3, //\n"
+               "               4);\n"
+               "};",
+               Alignment);
+
   verifyFormat("template <typename T, typename T_0 = very_long_type_name_0,\n"
                "          typename B   = very_long_type_name_1,\n"
                "          typename T_2 = very_long_type_name_2>\n"
@@ -19577,6 +19586,12 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
                Alignment);
   verifyFormat("auto aaaaaaaaaaaaaaaaaaaaa = {};\n"
                "auto b                     = g([] {\n"
+               "  return \"Hello \"\n"
+               "         \"World\";\n"
+               "});",
+               Alignment);
+  verifyFormat("auto aaaaaaaaaaaaaaaaaaaaa = {};\n"
+               "auto b                     = g([] {\n"
                "  f();\n"
                "  return;\n"
                "});",
@@ -19599,12 +19614,11 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
                "           ccc ? aaaaa : bbbbb,\n"
                "           dddddddddddddddddddddddddd);",
                Alignment);
-  // FIXME: https://llvm.org/PR53497
-  // verifyFormat("auto aaaaaaaaaaaa = f();\n"
-  //              "auto b            = f(aaaaaaaaaaaaaaaaaaaaaaaaa,\n"
-  //              "    ccc ? aaaaa : bbbbb,\n"
-  //              "    dddddddddddddddddddddddddd);",
-  //              Alignment);
+  verifyFormat("auto aaaaaaaaaaaa = f();\n"
+               "auto b            = f(aaaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "                      ccc ? aaaaa : bbbbb,\n"
+               "                      dddddddddddddddddddddddddd);",
+               Alignment);
 
   // Confirm proper handling of AlignConsecutiveAssignments with
   // BinPackArguments.
@@ -20192,6 +20206,11 @@ TEST_F(FormatTest, AlignConsecutiveDeclarations) {
                "    i = 3    //\n"
                "};",
                Alignment);
+  // When assignments are nested, each level should be aligned.
+  verifyFormat("float i2 = 0;\n"
+               "auto  v  = type{i2 = 1, //\n"
+               "                i  = 3};",
+               Alignment);
   Alignment.AlignConsecutiveAssignments.Enabled = false;
 
   verifyFormat(
@@ -20681,6 +20700,21 @@ TEST_F(FormatTest, AlignWithLineBreaks) {
                "}",
                Style);
 
+  verifyFormat("void foo() {\n"
+               "  int    myVar = 5;\n"
+               "  double x     = 3.14;\n"
+               "  auto   str   = (\"Hello \"\n"
+               "                  \"World\");\n"
+               "  auto   s     = (\"Hello \"\n"
+               "                  \"Again\");\n"
+               "}",
+               Style);
+
+  verifyFormat("A    B       = {\"Hello \"\n"
+               "                \"World\"};\n"
+               "BYTE payload = 2;",
+               Style);
+
   // clang-format off
   verifyFormat("void foo() {\n"
                "  const int  capacityBefore = Entries.capacity();\n"
@@ -20763,6 +20797,28 @@ TEST_F(FormatTest, AlignWithInitializerPeriods) {
                "}",
                Style);
 
+  // The lines inside the braces are supposed to be indented by
+  // BracedInitializerIndentWidth from the start of the line. They should not
+  // move with the opening brace.
+  verifyFormat("void foo2(void) {\n"
+               "  BYTE p[1] = 1;\n"
+               "  A B       = {\n"
+               "      .one_foooooooooooooooo = 2,\n"
+               "      .two_fooooooooooooo    = 3,\n"
+               "      .three_fooooooooooooo  = 4,\n"
+               "  };\n"
+               "  BYTE payload = 2;\n"
+               "}",
+               Style);
+
+  verifyFormat("auto aaaaaaaaaaaaaaaaaaaaa = {};\n"
+               "auto b                     = g([] {\n"
+               "  x = {.one_foooooooooooooooo = 2, //\n"
+               "       .two_fooooooooooooo    = 3, //\n"
+               "       .three_fooooooooooooo  = 4};\n"
+               "});",
+               Style);
+
   Style.AlignConsecutiveAssignments.Enabled = false;
   Style.AlignConsecutiveDeclarations.Enabled = true;
   verifyFormat("void foo3(void) {\n"
diff --git a/clang/unittests/StaticAnalyzer/SValTest.cpp b/clang/unittests/StaticAnalyzer/SValTest.cpp
index 1f4a18b..db4b01b 100644
--- a/clang/unittests/StaticAnalyzer/SValTest.cpp
+++ b/clang/unittests/StaticAnalyzer/SValTest.cpp
@@ -302,13 +302,13 @@ void foo(int x) {
   ASSERT_FALSE(B.getType(Context).isNull());
   const auto *BRecordType = dyn_cast<RecordType>(B.getType(Context));
   ASSERT_NE(BRecordType, nullptr);
-  EXPECT_EQ("TestStruct", BRecordType->getOriginalDecl()->getName());
+  EXPECT_EQ("TestStruct", BRecordType->getDecl()->getName());
 
   SVal C = getByName("c");
   ASSERT_FALSE(C.getType(Context).isNull());
   const auto *CRecordType = dyn_cast<RecordType>(C.getType(Context));
   ASSERT_NE(CRecordType, nullptr);
-  EXPECT_EQ("TestUnion", CRecordType->getOriginalDecl()->getName());
+  EXPECT_EQ("TestUnion", CRecordType->getDecl()->getName());
 
   auto D = getByName("d").getAs<nonloc::CompoundVal>();
   ASSERT_TRUE(D.has_value());
@@ -322,7 +322,7 @@ void foo(int x) {
   ASSERT_FALSE(LDT.isNull());
   const auto *DRecordType = dyn_cast<RecordType>(LDT);
   ASSERT_NE(DRecordType, nullptr);
-  EXPECT_EQ("TestStruct", DRecordType->getOriginalDecl()->getName());
+  EXPECT_EQ("TestStruct", DRecordType->getDecl()->getName());
 }
 
 SVAL_TEST(GetStringType, R"(
@@ -351,7 +351,7 @@ void TestClass::foo() {
   ASSERT_NE(APtrTy, nullptr);
   const auto *ARecordType = dyn_cast<RecordType>(APtrTy->getPointeeType());
   ASSERT_NE(ARecordType, nullptr);
-  EXPECT_EQ("TestClass", ARecordType->getOriginalDecl()->getName());
+  EXPECT_EQ("TestClass", ARecordType->getDecl()->getName());
 }
 
 SVAL_TEST(GetFunctionPtrType, R"(
diff --git a/clang/unittests/Tooling/LookupTest.cpp b/clang/unittests/Tooling/LookupTest.cpp
index 4c49ebe..ed6f5d4 100644
--- a/clang/unittests/Tooling/LookupTest.cpp
+++ b/clang/unittests/Tooling/LookupTest.cpp
@@ -200,9 +200,9 @@ TEST(LookupTest, replaceNestedClassName) {
   Visitor.OnRecordTypeLoc = [&](RecordTypeLoc Type) {
     // Filter Types by name since there are other `RecordTypeLoc` in the test
     // file.
-    if (Type.getOriginalDecl()->getQualifiedNameAsString() == "a::b::Foo") {
-      EXPECT_EQ("x::Bar", replaceTypeLoc(Type.getOriginalDecl(),
-                                         Type.getBeginLoc(), "::a::x::Bar"));
+    if (Type.getDecl()->getQualifiedNameAsString() == "a::b::Foo") {
+      EXPECT_EQ("x::Bar", replaceTypeLoc(Type.getDecl(), Type.getBeginLoc(),
+                                         "::a::x::Bar"));
     }
   };
   Visitor.runOver("namespace a { namespace b {\n"
@@ -227,9 +227,9 @@ TEST(LookupTest, replaceNestedClassName) {
   // `x::y::Foo` in c.cc [1], it should not make "Foo" at [0] ambiguous because
   // it's not visible at [0].
   Visitor.OnRecordTypeLoc = [&](RecordTypeLoc Type) {
-    if (Type.getOriginalDecl()->getQualifiedNameAsString() == "x::y::Old") {
-      EXPECT_EQ("Foo", replaceTypeLoc(Type.getOriginalDecl(),
-                                      Type.getBeginLoc(), "::x::Foo"));
+    if (Type.getDecl()->getQualifiedNameAsString() == "x::y::Old") {
+      EXPECT_EQ("Foo",
+                replaceTypeLoc(Type.getDecl(), Type.getBeginLoc(), "::x::Foo"));
     }
   };
   Visitor.runOver(R"(
diff --git a/clang/unittests/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp b/clang/unittests/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp
index 88cebb7..587a00d 100644
--- a/clang/unittests/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp
+++ b/clang/unittests/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp
@@ -24,7 +24,7 @@ public:
   bool VisitRecordTypeLoc(RecordTypeLoc RTL) override {
     if (!RTL)
       return true;
-    Match(RTL.getOriginalDecl()->getName(), RTL.getNameLoc());
+    Match(RTL.getDecl()->getName(), RTL.getNameLoc());
     return true;
   }
 };
diff --git a/clang/unittests/Tooling/RecursiveASTVisitorTests/NestedNameSpecifiers.cpp b/clang/unittests/Tooling/RecursiveASTVisitorTests/NestedNameSpecifiers.cpp
index 4181cd2..120b14b 100644
--- a/clang/unittests/Tooling/RecursiveASTVisitorTests/NestedNameSpecifiers.cpp
+++ b/clang/unittests/Tooling/RecursiveASTVisitorTests/NestedNameSpecifiers.cpp
@@ -18,7 +18,7 @@ public:
   bool VisitRecordTypeLoc(RecordTypeLoc RTL) override {
     if (!RTL)
       return true;
-    Match(RTL.getOriginalDecl()->getName(), RTL.getNameLoc());
+    Match(RTL.getDecl()->getName(), RTL.getNameLoc());
     return true;
   }
 
diff --git a/cmake/Modules/FindGRPC.cmake b/cmake/Modules/FindGRPC.cmake
index a559da4..2eec35b 100644
--- a/cmake/Modules/FindGRPC.cmake
+++ b/cmake/Modules/FindGRPC.cmake
@@ -3,21 +3,23 @@ option(ENABLE_GRPC_REFLECTION "Link to gRPC Reflection library" OFF)
 # FIXME(kirillbobyrev): Check if gRPC and Protobuf headers can be included at
 # configure time.
 find_package(Threads REQUIRED)
-if (GRPC_INSTALL_PATH)
-  # This setup requires gRPC to be built from sources using CMake and installed
-  # to ${GRPC_INSTALL_PATH} via -DCMAKE_INSTALL_PREFIX=${GRPC_INSTALL_PATH}.
-  # Libraries will be linked according to gRPC build policy which generates
-  # static libraries when BUILD_SHARED_LIBS is Off and dynamic libraries when
-  # it's On (NOTE: This is a variable passed to gRPC CMake build invocation,
-  # LLVM's BUILD_SHARED_LIBS has no effect).
-  set(protobuf_MODULE_COMPATIBLE TRUE)
-  find_package(Protobuf CONFIG REQUIRED HINTS ${GRPC_INSTALL_PATH})
-  message(STATUS "Using protobuf ${Protobuf_VERSION}")
-  find_package(gRPC CONFIG REQUIRED HINTS ${GRPC_INSTALL_PATH})
-  message(STATUS "Using gRPC ${gRPC_VERSION}")
 
-  include_directories(${Protobuf_INCLUDE_DIRS})
+# Prefer finding gPRC through CMakeConfig and a hint can be provided via
+# GRPC_INSTALL_PATH. This requires gRPC to be built and installed
+# to ${GRPC_INSTALL_PATH} via -DCMAKE_INSTALL_PREFIX=${GRPC_INSTALL_PATH}.
+# Libraries will be linked according to gRPC build policy which generates
+# static libraries when BUILD_SHARED_LIBS is Off and dynamic libraries when
+# it's On (NOTE: This is a variable passed to gRPC CMake build invocation,
+# LLVM's BUILD_SHARED_LIBS has no effect).
+# Package managers like Homebrew will also install Config.cmake and user can
+# specify GRPC_INSTALL_PATH or CMAKE_PREFIX_PATH to locate installed package.
+set(protobuf_MODULE_COMPATIBLE TRUE)
+find_package(Protobuf CONFIG HINTS ${GRPC_INSTALL_PATH})
+message(STATUS "Using protobuf ${Protobuf_VERSION}")
+find_package(gRPC CONFIG HINTS ${GRPC_INSTALL_PATH})
+message(STATUS "Using gRPC ${gRPC_VERSION}")
 
+if (Protobuf_FOUND AND gRPC_FOUND)
   # gRPC CMake CONFIG gives the libraries slightly odd names, make them match
   # the conventional system-installed names.
   set_target_properties(protobuf::libprotobuf PROPERTIES IMPORTED_GLOBAL TRUE)
@@ -32,10 +34,12 @@ if (GRPC_INSTALL_PATH)
   set(GRPC_CPP_PLUGIN $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
   set(PROTOC ${Protobuf_PROTOC_EXECUTABLE})
 else()
-  # This setup requires system-installed gRPC and Protobuf.
+  # Now fallback to system-installed gRPC and ProtoBuf.
   # We always link dynamically in this mode. While the static libraries are
   # usually installed, the CMake files telling us *which* static libraries to
   # link are not.
+  # FIXME: this path should not work on newer grpc versions and should be
+  # removed in favor of `find_package` implementation.
   if (NOT BUILD_SHARED_LIBS)
     message(NOTICE "gRPC and Protobuf will be linked dynamically. If you want static linking, build gRPC from sources with -DBUILD_SHARED_LIBS=Off.")
   endif()
@@ -44,55 +48,17 @@ else()
   if (NOT GRPC_CPP_PLUGIN OR NOT PROTOC)
     message(FATAL_ERROR "gRPC C++ Plugin and Protoc must be on $PATH for gRPC-enabled build.")
   endif()
-  # On macOS the libraries are typically installed via Homebrew and are not on
-  # the system path.
-  set(GRPC_OPTS "")
-  set(PROTOBUF_OPTS "")
-  set(GRPC_INCLUDE_PATHS "")
-  if (${APPLE})
-    find_program(HOMEBREW brew)
-    # If Homebrew is not found, the user might have installed libraries
-    # manually. Fall back to the system path.
-    if (HOMEBREW)
-      execute_process(COMMAND ${HOMEBREW} --prefix grpc
-        OUTPUT_VARIABLE GRPC_HOMEBREW_PATH
-        RESULT_VARIABLE GRPC_HOMEBREW_RETURN_CODE
-        OUTPUT_STRIP_TRAILING_WHITESPACE)
-      execute_process(COMMAND ${HOMEBREW} --prefix protobuf
-        OUTPUT_VARIABLE PROTOBUF_HOMEBREW_PATH
-        RESULT_VARIABLE PROTOBUF_HOMEBREW_RETURN_CODE
-        OUTPUT_STRIP_TRAILING_WHITESPACE)
-      execute_process(COMMAND ${HOMEBREW} --prefix abseil
-        OUTPUT_VARIABLE ABSL_HOMEBREW_PATH
-        RESULT_VARIABLE ABSL_HOMEBREW_RETURN_CODE
-        OUTPUT_STRIP_TRAILING_WHITESPACE)
-      # If either library is not installed via Homebrew, fall back to the
-      # system path.
-      if (GRPC_HOMEBREW_RETURN_CODE EQUAL "0")
-        list(APPEND GRPC_INCLUDE_PATHS ${GRPC_HOMEBREW_PATH}/include)
-        list(APPEND GRPC_OPTS PATHS ${GRPC_HOMEBREW_PATH}/lib NO_DEFAULT_PATH)
-      endif()
-      if (PROTOBUF_HOMEBREW_RETURN_CODE EQUAL "0")
-        list(APPEND GRPC_INCLUDE_PATHS ${PROTOBUF_HOMEBREW_PATH}/include)
-        list(APPEND PROTOBUF_OPTS PATHS ${PROTOBUF_HOMEBREW_PATH}/lib NO_DEFAULT_PATH)
-      endif()
-      if (ABSL_HOMEBREW_RETURN_CODE EQUAL "0")
-        list(APPEND GRPC_INCLUDE_PATHS ${ABSL_HOMEBREW_PATH}/include)
-      endif()
-    endif()
-  endif()
   if(NOT TARGET grpc++)
-    find_library(GRPC_LIBRARY grpc++ ${GRPC_OPTS} REQUIRED)
+    find_library(GRPC_LIBRARY grpc++ REQUIRED)
     add_library(grpc++ UNKNOWN IMPORTED GLOBAL)
     message(STATUS "Using grpc++: " ${GRPC_LIBRARY})
     set_target_properties(grpc++ PROPERTIES IMPORTED_LOCATION ${GRPC_LIBRARY})
-    target_include_directories(grpc++ INTERFACE ${GRPC_INCLUDE_PATHS})
     if (ENABLE_GRPC_REFLECTION)
-      find_library(GRPC_REFLECTION_LIBRARY grpc++_reflection ${GRPC_OPTS} REQUIRED)
+      find_library(GRPC_REFLECTION_LIBRARY grpc++_reflection REQUIRED)
       add_library(grpc++_reflection UNKNOWN IMPORTED GLOBAL)
       set_target_properties(grpc++_reflection PROPERTIES IMPORTED_LOCATION ${GRPC_REFLECTION_LIBRARY})
     endif()
-    find_library(PROTOBUF_LIBRARY protobuf ${PROTOBUF_OPTS} REQUIRED)
+    find_library(PROTOBUF_LIBRARY protobuf REQUIRED)
     message(STATUS "Using protobuf: " ${PROTOBUF_LIBRARY})
     add_library(protobuf UNKNOWN IMPORTED GLOBAL)
     set_target_properties(protobuf PROPERTIES IMPORTED_LOCATION ${PROTOBUF_LIBRARY})
diff --git a/compiler-rt/lib/asan/asan_fake_stack.cpp b/compiler-rt/lib/asan/asan_fake_stack.cpp
index c3ed252..d3fa953 100644
--- a/compiler-rt/lib/asan/asan_fake_stack.cpp
+++ b/compiler-rt/lib/asan/asan_fake_stack.cpp
@@ -28,7 +28,7 @@ static const u64 kAllocaRedzoneMask = 31UL;
 // For small size classes inline PoisonShadow for better performance.
 ALWAYS_INLINE void SetShadow(uptr ptr, uptr size, uptr class_id, u64 magic) {
   CHECK(AddrIsAlignedByGranularity(ptr + size));
-  u64 *shadow = reinterpret_cast<u64*>(MemToShadow(ptr));
+  u64* shadow = reinterpret_cast<u64*>(MemToShadow(ptr));
   if (ASAN_SHADOW_SCALE == 3 && class_id <= 6) {
     // This code expects ASAN_SHADOW_SCALE=3.
     for (uptr i = 0; i < (((uptr)1) << class_id); i++) {
@@ -47,7 +47,7 @@ ALWAYS_INLINE void SetShadow(uptr ptr, uptr size, uptr class_id, u64 magic) {
   }
 }
 
-FakeStack *FakeStack::Create(uptr stack_size_log) {
+FakeStack* FakeStack::Create(uptr stack_size_log) {
   static uptr kMinStackSizeLog = 16;
   static uptr kMaxStackSizeLog = FIRST_32_SECOND_64(24, 28);
   if (stack_size_log < kMinStackSizeLog)
@@ -57,7 +57,7 @@ FakeStack *FakeStack::Create(uptr stack_size_log) {
   CHECK_LE(kMaxStackFrameSizeLog, stack_size_log);
   uptr size = RequiredSize(stack_size_log);
   uptr padded_size = size + kMaxStackFrameSize;
-  void *true_res = reinterpret_cast<void *>(
+  void* true_res = reinterpret_cast<void*>(
       flags()->uar_noreserve ? MmapNoReserveOrDie(padded_size, "FakeStack")
                              : MmapOrDie(padded_size, "FakeStack"));
   // GetFrame() requires the property that
@@ -66,20 +66,20 @@ FakeStack *FakeStack::Create(uptr stack_size_log) {
   // We didn't use MmapAlignedOrDieOnFatalError, because it requires that the
   // *size* is a power of 2, which is an overly strong condition.
   static_assert(alignof(FakeStack) <= kMaxStackFrameSize);
-  FakeStack *res = reinterpret_cast<FakeStack *>(
+  FakeStack* res = reinterpret_cast<FakeStack*>(
       RoundUpTo(
           (uptr)true_res + kFlagsOffset + SizeRequiredForFlags(stack_size_log),
           kMaxStackFrameSize) -
       kFlagsOffset - SizeRequiredForFlags(stack_size_log));
   res->true_start = true_res;
   res->stack_size_log_ = stack_size_log;
-  u8 *p = reinterpret_cast<u8 *>(res);
+  u8* p = reinterpret_cast<u8*>(res);
   VReport(1,
           "T%d: FakeStack created: %p -- %p stack_size_log: %zd; "
           "mmapped %zdK, noreserve=%d, true_start: %p, start of first frame: "
           "0x%zx\n",
-          GetCurrentTidOrInvalid(), (void *)p,
-          (void *)(p + FakeStack::RequiredSize(stack_size_log)), stack_size_log,
+          GetCurrentTidOrInvalid(), (void*)p,
+          (void*)(p + FakeStack::RequiredSize(stack_size_log)), stack_size_log,
           size >> 10, flags()->uar_noreserve, res->true_start,
           res->GetFrame(stack_size_log, /*class_id*/ 0, /*pos*/ 0));
   return res;
@@ -109,14 +109,14 @@ void FakeStack::PoisonAll(u8 magic) {
 #if !defined(_MSC_VER) || defined(__clang__)
 ALWAYS_INLINE USED
 #endif
-FakeFrame *FakeStack::Allocate(uptr stack_size_log, uptr class_id,
-                               uptr real_stack) {
+    FakeFrame* FakeStack::Allocate(uptr stack_size_log, uptr class_id,
+                                   uptr real_stack) {
   CHECK_LT(class_id, kNumberOfSizeClasses);
   if (needs_gc_)
     GC(real_stack);
-  uptr &hint_position = hint_position_[class_id];
+  uptr& hint_position = hint_position_[class_id];
   const int num_iter = NumberOfFrames(stack_size_log, class_id);
-  u8 *flags = GetFlags(stack_size_log, class_id);
+  u8* flags = GetFlags(stack_size_log, class_id);
   for (int i = 0; i < num_iter; i++) {
     uptr pos = ModuloNumberOfFrames(stack_size_log, class_id, hint_position++);
     // This part is tricky. On one hand, checking and setting flags[pos]
@@ -126,22 +126,24 @@ FakeFrame *FakeStack::Allocate(uptr stack_size_log, uptr class_id,
     // and so will not touch this particular byte. So, it is safe to do this
     // with regular non-atomic load and store (at least I was not able to make
     // this code crash).
-    if (flags[pos]) continue;
+    if (flags[pos])
+      continue;
     flags[pos] = 1;
-    FakeFrame *res = reinterpret_cast<FakeFrame *>(
-        GetFrame(stack_size_log, class_id, pos));
+    FakeFrame* res =
+        reinterpret_cast<FakeFrame*>(GetFrame(stack_size_log, class_id, pos));
     res->real_stack = real_stack;
     *SavedFlagPtr(reinterpret_cast<uptr>(res), class_id) = &flags[pos];
     return res;
   }
-  return nullptr; // We are out of fake stack.
+  return nullptr;  // We are out of fake stack.
 }
 
-uptr FakeStack::AddrIsInFakeStack(uptr ptr, uptr *frame_beg, uptr *frame_end) {
+uptr FakeStack::AddrIsInFakeStack(uptr ptr, uptr* frame_beg, uptr* frame_end) {
   uptr stack_size_log = this->stack_size_log();
   uptr beg = reinterpret_cast<uptr>(GetFrame(stack_size_log, 0, 0));
   uptr end = reinterpret_cast<uptr>(this) + RequiredSize(stack_size_log);
-  if (ptr < beg || ptr >= end) return 0;
+  if (ptr < beg || ptr >= end)
+    return 0;
   uptr class_id = (ptr - beg) >> stack_size_log;
   uptr base = beg + (class_id << stack_size_log);
   CHECK_LE(base, ptr);
@@ -153,9 +155,7 @@ uptr FakeStack::AddrIsInFakeStack(uptr ptr, uptr *frame_beg, uptr *frame_end) {
   return res;
 }
 
-void FakeStack::HandleNoReturn() {
-  needs_gc_ = true;
-}
+void FakeStack::HandleNoReturn() { needs_gc_ = true; }
 
 // Hack: The statement below is not true if we take into account sigaltstack or
 // makecontext. It should be possible to make GC to discard wrong stack frame if
@@ -170,7 +170,7 @@ void FakeStack::HandleNoReturn() {
 // We do it based on their 'real_stack' values -- everything that is lower
 // than the current real_stack is garbage.
 NOINLINE void FakeStack::GC(uptr real_stack) {
-  AsanThread *curr_thread = GetCurrentThread();
+  AsanThread* curr_thread = GetCurrentThread();
   if (!curr_thread)
     return;  // Try again when we have a thread.
   auto top = curr_thread->stack_top();
@@ -179,12 +179,13 @@ NOINLINE void FakeStack::GC(uptr real_stack) {
     return;  // Not the default stack.
 
   for (uptr class_id = 0; class_id < kNumberOfSizeClasses; class_id++) {
-    u8 *flags = GetFlags(stack_size_log(), class_id);
+    u8* flags = GetFlags(stack_size_log(), class_id);
     for (uptr i = 0, n = NumberOfFrames(stack_size_log(), class_id); i < n;
          i++) {
-      if (flags[i] == 0) continue;  // not allocated.
-      FakeFrame *ff = reinterpret_cast<FakeFrame *>(
-          GetFrame(stack_size_log(), class_id, i));
+      if (flags[i] == 0)
+        continue;  // not allocated.
+      FakeFrame* ff =
+          reinterpret_cast<FakeFrame*>(GetFrame(stack_size_log(), class_id, i));
       // GC only on the default stack.
       if (bottom < ff->real_stack && ff->real_stack < real_stack) {
         flags[i] = 0;
@@ -197,14 +198,15 @@ NOINLINE void FakeStack::GC(uptr real_stack) {
   needs_gc_ = false;
 }
 
-void FakeStack::ForEachFakeFrame(RangeIteratorCallback callback, void *arg) {
+void FakeStack::ForEachFakeFrame(RangeIteratorCallback callback, void* arg) {
   for (uptr class_id = 0; class_id < kNumberOfSizeClasses; class_id++) {
-    u8 *flags = GetFlags(stack_size_log(), class_id);
+    u8* flags = GetFlags(stack_size_log(), class_id);
     for (uptr i = 0, n = NumberOfFrames(stack_size_log(), class_id); i < n;
          i++) {
-      if (flags[i] == 0) continue;  // not allocated.
-      FakeFrame *ff = reinterpret_cast<FakeFrame *>(
-          GetFrame(stack_size_log(), class_id, i));
+      if (flags[i] == 0)
+        continue;  // not allocated.
+      FakeFrame* ff =
+          reinterpret_cast<FakeFrame*>(GetFrame(stack_size_log(), class_id, i));
       uptr begin = reinterpret_cast<uptr>(ff);
       callback(begin, begin + FakeStack::BytesInSizeClass(class_id), arg);
     }
@@ -212,44 +214,51 @@ void FakeStack::ForEachFakeFrame(RangeIteratorCallback callback, void *arg) {
 }
 
 #if (SANITIZER_LINUX && !SANITIZER_ANDROID) || SANITIZER_FUCHSIA
-static THREADLOCAL FakeStack *fake_stack_tls;
+static THREADLOCAL FakeStack* fake_stack_tls;
 
-FakeStack *GetTLSFakeStack() {
-  return fake_stack_tls;
-}
-void SetTLSFakeStack(FakeStack *fs) {
-  fake_stack_tls = fs;
-}
+static FakeStack* GetTLSFakeStack() { return fake_stack_tls; }
+static void SetTLSFakeStack(FakeStack* fs) { fake_stack_tls = fs; }
+void ResetTLSFakeStack() { fake_stack_tls = nullptr; }
 #else
-FakeStack *GetTLSFakeStack() { return 0; }
-void SetTLSFakeStack(FakeStack *fs) { }
+static FakeStack* GetTLSFakeStack() { return nullptr; }
+static void SetTLSFakeStack(FakeStack*) {}
+void ResetTLSFakeStack() {}
 #endif  // (SANITIZER_LINUX && !SANITIZER_ANDROID) || SANITIZER_FUCHSIA
 
-static FakeStack *GetFakeStack() {
-  AsanThread *t = GetCurrentThread();
-  if (!t) return nullptr;
+static FakeStack* GetFakeStack() {
+  AsanThread* t = GetCurrentThread();
+  if (!t)
+    return nullptr;
   return t->get_or_create_fake_stack();
 }
 
-static FakeStack *GetFakeStackFast() {
-  if (FakeStack *fs = GetTLSFakeStack())
+static FakeStack* GetFakeStackFast() {
+  FakeStack* fs = GetTLSFakeStack();
+  if (LIKELY(fs))
     return fs;
   if (!__asan_option_detect_stack_use_after_return)
     return nullptr;
-  return GetFakeStack();
+  fs = GetFakeStack();
+  if (LIKELY(fs))
+    SetTLSFakeStack(fs);
+  return fs;
 }
 
-static FakeStack *GetFakeStackFastAlways() {
-  if (FakeStack *fs = GetTLSFakeStack())
+static FakeStack* GetFakeStackFastAlways() {
+  FakeStack* fs = GetTLSFakeStack();
+  if (LIKELY(fs))
     return fs;
-  return GetFakeStack();
+  fs = GetFakeStack();
+  if (LIKELY(fs))
+    SetTLSFakeStack(fs);
+  return fs;
 }
 
 static ALWAYS_INLINE uptr OnMalloc(uptr class_id, uptr size) {
-  FakeStack *fs = GetFakeStackFast();
+  FakeStack* fs = GetFakeStackFast();
   if (!fs)
     return 0;
-  FakeFrame *ff =
+  FakeFrame* ff =
       fs->Allocate(fs->stack_size_log(), class_id, GET_CURRENT_FRAME());
   if (!ff)
     return 0;  // Out of fake stack.
@@ -259,10 +268,10 @@ static ALWAYS_INLINE uptr OnMalloc(uptr class_id, uptr size) {
 }
 
 static ALWAYS_INLINE uptr OnMallocAlways(uptr class_id, uptr size) {
-  FakeStack *fs = GetFakeStackFastAlways();
+  FakeStack* fs = GetFakeStackFastAlways();
   if (!fs)
     return 0;
-  FakeFrame *ff =
+  FakeFrame* ff =
       fs->Allocate(fs->stack_size_log(), class_id, GET_CURRENT_FRAME());
   if (!ff)
     return 0;  // Out of fake stack.
@@ -276,17 +285,17 @@ static ALWAYS_INLINE void OnFree(uptr ptr, uptr class_id, uptr size) {
   SetShadow(ptr, size, class_id, kMagic8);
 }
 
-} // namespace __asan
+}  // namespace __asan
 
 // ---------------------- Interface ---------------- {{{1
 using namespace __asan;
 #define DEFINE_STACK_MALLOC_FREE_WITH_CLASS_ID(class_id)                      \
   extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr                               \
-      __asan_stack_malloc_##class_id(uptr size) {                             \
+  __asan_stack_malloc_##class_id(uptr size) {                                 \
     return OnMalloc(class_id, size);                                          \
   }                                                                           \
   extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr                               \
-      __asan_stack_malloc_always_##class_id(uptr size) {                      \
+  __asan_stack_malloc_always_##class_id(uptr size) {                          \
     return OnMallocAlways(class_id, size);                                    \
   }                                                                           \
   extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __asan_stack_free_##class_id( \
@@ -311,21 +320,25 @@ extern "C" {
 // -asan-use-after-return=never, after modal UAR flag lands
 // (https://github.com/google/sanitizers/issues/1394)
 SANITIZER_INTERFACE_ATTRIBUTE
-void *__asan_get_current_fake_stack() { return GetFakeStackFast(); }
+void* __asan_get_current_fake_stack() { return GetFakeStackFast(); }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void *__asan_addr_is_in_fake_stack(void *fake_stack, void *addr, void **beg,
-                                   void **end) {
-  FakeStack *fs = reinterpret_cast<FakeStack*>(fake_stack);
-  if (!fs) return nullptr;
+void* __asan_addr_is_in_fake_stack(void* fake_stack, void* addr, void** beg,
+                                   void** end) {
+  FakeStack* fs = reinterpret_cast<FakeStack*>(fake_stack);
+  if (!fs)
+    return nullptr;
   uptr frame_beg, frame_end;
-  FakeFrame *frame = reinterpret_cast<FakeFrame *>(fs->AddrIsInFakeStack(
+  FakeFrame* frame = reinterpret_cast<FakeFrame*>(fs->AddrIsInFakeStack(
       reinterpret_cast<uptr>(addr), &frame_beg, &frame_end));
-  if (!frame) return nullptr;
+  if (!frame)
+    return nullptr;
   if (frame->magic != kCurrentStackFrameMagic)
     return nullptr;
-  if (beg) *beg = reinterpret_cast<void*>(frame_beg);
-  if (end) *end = reinterpret_cast<void*>(frame_end);
+  if (beg)
+    *beg = reinterpret_cast<void*>(frame_beg);
+  if (end)
+    *end = reinterpret_cast<void*>(frame_end);
   return reinterpret_cast<void*>(frame->real_stack);
 }
 
@@ -344,9 +357,9 @@ void __asan_alloca_poison(uptr addr, uptr size) {
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __asan_allocas_unpoison(uptr top, uptr bottom) {
-  if ((!top) || (top > bottom)) return;
-  REAL(memset)
-  (reinterpret_cast<void *>(MemToShadow(top)), 0,
-   (bottom - top) / ASAN_SHADOW_GRANULARITY);
+  if ((!top) || (top > bottom))
+    return;
+  REAL(memset)(reinterpret_cast<void*>(MemToShadow(top)), 0,
+               (bottom - top) / ASAN_SHADOW_GRANULARITY);
 }
-} // extern "C"
+}  // extern "C"
diff --git a/compiler-rt/lib/asan/asan_fake_stack.h b/compiler-rt/lib/asan/asan_fake_stack.h
index 50706e6..593c137 100644
--- a/compiler-rt/lib/asan/asan_fake_stack.h
+++ b/compiler-rt/lib/asan/asan_fake_stack.h
@@ -195,8 +195,7 @@ class FakeStack {
   void *true_start;
 };
 
-FakeStack *GetTLSFakeStack();
-void SetTLSFakeStack(FakeStack *fs);
+void ResetTLSFakeStack();
 
 }  // namespace __asan
 
diff --git a/compiler-rt/lib/asan/asan_thread.cpp b/compiler-rt/lib/asan/asan_thread.cpp
index 2627ae1..0ed58bb 100644
--- a/compiler-rt/lib/asan/asan_thread.cpp
+++ b/compiler-rt/lib/asan/asan_thread.cpp
@@ -163,7 +163,7 @@ void AsanThread::StartSwitchFiber(FakeStack **fake_stack_save, uptr bottom,
   if (fake_stack_save)
     *fake_stack_save = fake_stack_;
   fake_stack_ = nullptr;
-  SetTLSFakeStack(nullptr);
+  ResetTLSFakeStack();
   // if fake_stack_save is null, the fiber will die, delete the fakestack
   if (!fake_stack_save && current_fake_stack)
     current_fake_stack->Destroy(this->tid());
@@ -177,8 +177,8 @@ void AsanThread::FinishSwitchFiber(FakeStack *fake_stack_save, uptr *bottom_old,
   }
 
   if (fake_stack_save) {
-    SetTLSFakeStack(fake_stack_save);
     fake_stack_ = fake_stack_save;
+    ResetTLSFakeStack();
   }
 
   if (bottom_old)
@@ -242,7 +242,7 @@ FakeStack *AsanThread::AsyncSignalSafeLazyInitFakeStack() {
         Max(stack_size_log, static_cast<uptr>(flags()->min_uar_stack_size_log));
     fake_stack_ = FakeStack::Create(stack_size_log);
     DCHECK_EQ(GetCurrentThread(), this);
-    SetTLSFakeStack(fake_stack_);
+    ResetTLSFakeStack();
     return fake_stack_;
   }
   return nullptr;
diff --git a/compiler-rt/lib/asan/asan_thread.h b/compiler-rt/lib/asan/asan_thread.h
index 12f0cc7..19b7f34 100644
--- a/compiler-rt/lib/asan/asan_thread.h
+++ b/compiler-rt/lib/asan/asan_thread.h
@@ -104,7 +104,7 @@ class AsanThread {
     if (!fake_stack_) return;
     FakeStack *t = fake_stack_;
     fake_stack_ = nullptr;
-    SetTLSFakeStack(nullptr);
+    ResetTLSFakeStack();
     t->Destroy(tid);
   }
 
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index a40675c..d91e13c 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -520,6 +520,13 @@ static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
       *Subtype = INTEL_COREI7_PANTHERLAKE;
       break;
 
+    // Wildcatlake:
+    case 0xd5:
+      CPU = "wildcatlake";
+      *Type = INTEL_COREI7;
+      *Subtype = INTEL_COREI7_PANTHERLAKE;
+      break;
+
     // Icelake Xeon:
     case 0x6a:
     case 0x6c:
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index d8d0956..20a0919 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1289,16 +1289,7 @@ bool CheckForCoindexedObject(parser::ContextualMessages &,
     const std::optional<ActualArgument> &, const std::string &procName,
     const std::string &argName);
 
-inline bool IsCUDADeviceSymbol(const Symbol &sym) {
-  if (const auto *details =
-          sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
-    if (details->cudaDataAttr() &&
-        *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
-      return true;
-    }
-  }
-  return false;
-}
+bool IsCUDADeviceSymbol(const Symbol &sym);
 
 inline bool IsCUDAManagedOrUnifiedSymbol(const Symbol &sym) {
   if (const auto *details =
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index f204eef..1de5e6b 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -111,6 +111,7 @@ ENUM_CLASS(KindCode, none, defaultIntegerKind,
     atomicIntKind, // atomic_int_kind from iso_fortran_env
     atomicIntOrLogicalKind, // atomic_int_kind or atomic_logical_kind
     sameAtom, // same type and kind as atom
+    extensibleOrUnlimitedType, // extensible or unlimited polymorphic type
 )
 
 struct TypePattern {
@@ -160,7 +161,8 @@ static constexpr TypePattern AnyChar{CharType, KindCode::any};
 static constexpr TypePattern AnyLogical{LogicalType, KindCode::any};
 static constexpr TypePattern AnyRelatable{RelatableType, KindCode::any};
 static constexpr TypePattern AnyIntrinsic{IntrinsicType, KindCode::any};
-static constexpr TypePattern ExtensibleDerived{DerivedType, KindCode::any};
+static constexpr TypePattern ExtensibleDerived{
+    DerivedType, KindCode::extensibleOrUnlimitedType};
 static constexpr TypePattern AnyData{AnyType, KindCode::any};
 
 // Type is irrelevant, but not BOZ (for PRESENT(), OPTIONAL(), &c.)
@@ -2103,9 +2105,13 @@ std::optional<SpecificCall> IntrinsicInterface::Match(
       }
       return std::nullopt;
     } else if (!d.typePattern.categorySet.test(type->category())) {
+      const char *expected{
+          d.typePattern.kindCode == KindCode::extensibleOrUnlimitedType
+              ? ", expected extensible or unlimited polymorphic type"
+              : ""};
       messages.Say(arg->sourceLocation(),
-          "Actual argument for '%s=' has bad type '%s'"_err_en_US, d.keyword,
-          type->AsFortran());
+          "Actual argument for '%s=' has bad type '%s'%s"_err_en_US, d.keyword,
+          type->AsFortran(), expected);
       return std::nullopt; // argument has invalid type category
     }
     bool argOk{false};
@@ -2244,6 +2250,17 @@ std::optional<SpecificCall> IntrinsicInterface::Match(
         return std::nullopt;
       }
       break;
+    case KindCode::extensibleOrUnlimitedType:
+      argOk = type->IsUnlimitedPolymorphic() ||
+          (type->category() == TypeCategory::Derived &&
+              IsExtensibleType(GetDerivedTypeSpec(type)));
+      if (!argOk) {
+        messages.Say(arg->sourceLocation(),
+            "Actual argument for '%s=' has type '%s', but was expected to be an extensible or unlimited polymorphic type"_err_en_US,
+            d.keyword, type->AsFortran());
+        return std::nullopt;
+      }
+      break;
     default:
       CRASH_NO_CASE;
     }
diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp
index b927fa3..bd06acc 100644
--- a/flang/lib/Evaluate/tools.cpp
+++ b/flang/lib/Evaluate/tools.cpp
@@ -1153,6 +1153,18 @@ bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
   return (hasConstant || (hostSymbols.size() > 0)) && deviceSymbols.size() > 0;
 }
 
+bool IsCUDADeviceSymbol(const Symbol &sym) {
+  if (const auto *details =
+          sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
+    return details->cudaDataAttr() &&
+        *details->cudaDataAttr() != common::CUDADataAttr::Pinned;
+  } else if (const auto *details =
+                 sym.GetUltimate().detailsIf<semantics::AssocEntityDetails>()) {
+    return GetNbOfCUDADeviceSymbols(details->expr()) > 0;
+  }
+  return false;
+}
+
 // HasVectorSubscript()
 struct HasVectorSubscriptHelper
     : public AnyTraverse<HasVectorSubscriptHelper, bool,
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 53239cb..e7a6c4d 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -629,6 +629,10 @@ private:
     unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
     fir::ExtendedValue exv = isSource ? sourceExv : moldExv;
 
+    if (const Fortran::semantics::Symbol *sym{GetLastSymbol(sourceExpr)})
+      if (Fortran::semantics::IsCUDADevice(*sym))
+        TODO(loc, "CUDA Fortran: allocate with device source");
+
     // Generate a sequence of runtime calls.
     errorManager.genStatCheck(builder, loc);
     genAllocateObjectInit(box, allocatorIdx);
@@ -767,6 +771,15 @@ private:
                               const fir::MutableBoxValue &box,
                               ErrorManager &errorManager,
                               const Fortran::semantics::Symbol &sym) {
+
+    if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
+      if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec =
+              declTypeSpec->AsDerived())
+        if (derivedTypeSpec->HasDefaultInitialization(
+                /*ignoreAllocatable=*/true, /*ignorePointer=*/true))
+          TODO(loc,
+               "CUDA Fortran: allocate on device with default initialization");
+
     Fortran::lower::StatementContext stmtCtx;
     cuf::DataAttributeAttr cudaAttr =
         Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 260e525..2bbd803 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -40,6 +40,7 @@
 #include "mlir/IR/SymbolTable.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
@@ -128,6 +129,17 @@ class MapInfoFinalizationPass
     }
   }
 
+  /// Return true if the module has an OpenMP requires clause that includes
+  /// unified_shared_memory.
+  static bool moduleRequiresUSM(mlir::ModuleOp module) {
+    assert(module && "invalid module");
+    if (auto req = module->getAttrOfType<mlir::omp::ClauseRequiresAttr>(
+            "omp.requires"))
+      return mlir::omp::bitEnumContainsAll(
+          req.getValue(), mlir::omp::ClauseRequires::unified_shared_memory);
+    return false;
+  }
+
   /// Create the member map for coordRef and append it (and its index
   /// path) to the provided new* vectors, if it is not already present.
   void appendMemberMapIfNew(
@@ -425,8 +437,12 @@ class MapInfoFinalizationPass
 
     mapFlags flags = mapFlags::OMP_MAP_TO |
                      (mapFlags(mapTypeFlag) &
-                      (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE |
-                       mapFlags::OMP_MAP_ALWAYS));
+                      (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS));
+    // For unified_shared_memory, we additionally add `CLOSE` on the descriptor
+    // to ensure device-local placement where required by tests relying on USM +
+    // close semantics.
+    if (moduleRequiresUSM(target->getParentOfType<mlir::ModuleOp>()))
+      flags |= mapFlags::OMP_MAP_CLOSE;
     return llvm::to_underlying(flags);
   }
 
@@ -518,6 +534,75 @@ class MapInfoFinalizationPass
     return newMapInfoOp;
   }
 
+  // Expand mappings of type(C_PTR) to map their `__address` field explicitly
+  // as a single pointer-sized member (USM-gated at callsite). This helps in
+  // USM scenarios to ensure the pointer-sized mapping is used.
+  mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op,
+                                        fir::FirOpBuilder &builder) {
+    if (!op.getMembers().empty())
+      return op;
+
+    mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType());
+    if (!mlir::isa<fir::RecordType>(varTy))
+      return op;
+    auto recTy = mlir::cast<fir::RecordType>(varTy);
+    // If not a builtin C_PTR record, skip.
+    if (!recTy.getName().ends_with("__builtin_c_ptr"))
+      return op;
+
+    // Find the index of the c_ptr address component named "__address".
+    int32_t fieldIdx = recTy.getFieldIndex("__address");
+    if (fieldIdx < 0)
+      return op;
+
+    mlir::Location loc = op.getVarPtr().getLoc();
+    mlir::Type memTy = recTy.getType(fieldIdx);
+    fir::IntOrValue idxConst =
+        mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx);
+    mlir::Value coord = fir::CoordinateOp::create(
+        builder, loc, builder.getRefType(memTy), op.getVarPtr(),
+        llvm::SmallVector<fir::IntOrValue, 1>{idxConst});
+
+    // Child for the `__address` member.
+    llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}};
+    mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx);
+    // Force CLOSE in USM paths so the pointer gets device-local placement
+    // when required by tests relying on USM + close semantics.
+    uint64_t mapTypeVal =
+        op.getMapType() |
+        llvm::to_underlying(
+            llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
+    mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr(
+        builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal);
+
+    mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create(
+        builder, loc, coord.getType(), coord,
+        mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr,
+        builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
+            mlir::omp::VariableCaptureKind::ByRef),
+        /*varPtrPtr=*/mlir::Value{},
+        /*members=*/llvm::SmallVector<mlir::Value>{},
+        /*member_index=*/mlir::ArrayAttr{},
+        /*bounds=*/op.getBounds(),
+        /*mapperId=*/mlir::FlatSymbolRefAttr(),
+        /*name=*/op.getNameAttr(),
+        /*partial_map=*/builder.getBoolAttr(false));
+
+    // Rebuild the parent as a container with the `__address` member.
+    mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create(
+        builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(),
+        op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(),
+        /*varPtrPtr=*/mlir::Value{},
+        /*members=*/llvm::SmallVector<mlir::Value>{memberMap},
+        /*member_index=*/newMembersAttr,
+        /*bounds=*/llvm::SmallVector<mlir::Value>{},
+        /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(),
+        /*partial_map=*/builder.getBoolAttr(false));
+    op.replaceAllUsesWith(newParent.getResult());
+    op->erase();
+    return newParent;
+  }
+
   mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
                                                fir::FirOpBuilder &builder,
                                                mlir::Operation *target) {
@@ -1169,6 +1254,17 @@ class MapInfoFinalizationPass
         genBoxcharMemberMap(op, builder);
       });
 
+      // Expand type(C_PTR) only when unified_shared_memory is required,
+      // to ensure device-visible pointer size/behavior in USM scenarios
+      // without changing default expectations elsewhere.
+      func->walk([&](mlir::omp::MapInfoOp op) {
+        // Only expand C_PTR members when unified_shared_memory is required.
+        if (!moduleRequiresUSM(func->getParentOfType<mlir::ModuleOp>()))
+          return;
+        builder.setInsertionPoint(op);
+        genCptrMemberMap(op, builder);
+      });
+
       func->walk([&](mlir::omp::MapInfoOp op) {
         // TODO: Currently only supports a single user for the MapInfoOp. This
         // is fine for the moment, as the Fortran frontend will generate a
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index ea5e2c0..31e246c 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -3622,6 +3622,7 @@ void CheckHelper::CheckDioDtvArg(const Symbol &proc, const Symbol &subp,
                 ioKind == common::DefinedIo::ReadUnformatted
             ? Attr::INTENT_INOUT
             : Attr::INTENT_IN);
+    CheckDioDummyIsScalar(subp, *arg);
   }
 }
 
@@ -3687,6 +3688,7 @@ void CheckHelper::CheckDioAssumedLenCharacterArg(const Symbol &subp,
           "Dummy argument '%s' of a defined input/output procedure must be assumed-length CHARACTER of default kind"_err_en_US,
           arg->name());
     }
+    CheckDioDummyIsScalar(subp, *arg);
   }
 }
 
diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp
index 351af5c..515121a 100644
--- a/flang/lib/Semantics/check-omp-atomic.cpp
+++ b/flang/lib/Semantics/check-omp-atomic.cpp
@@ -519,8 +519,8 @@ private:
 ///   function references with scalar data pointer result of non-character
 ///   intrinsic type or variables that are non-polymorphic scalar pointers
 ///   and any length type parameter must be constant.
-void OmpStructureChecker::CheckAtomicType(
-    SymbolRef sym, parser::CharBlock source, std::string_view name) {
+void OmpStructureChecker::CheckAtomicType(SymbolRef sym,
+    parser::CharBlock source, std::string_view name, bool checkTypeOnPointer) {
   const DeclTypeSpec *typeSpec{sym->GetType()};
   if (!typeSpec) {
     return;
@@ -547,6 +547,22 @@ void OmpStructureChecker::CheckAtomicType(
     return;
   }
 
+  // Apply pointer-to-non-intrinsic rule only for intrinsic-assignment paths.
+  if (checkTypeOnPointer) {
+    using Category = DeclTypeSpec::Category;
+    Category cat{typeSpec->category()};
+    if (cat != Category::Numeric && cat != Category::Logical) {
+      std::string details = " has the POINTER attribute";
+      if (const auto *derived{typeSpec->AsDerived()}) {
+        details += " and derived type '"s + derived->name().ToString() + "'";
+      }
+      context_.Say(source,
+          "ATOMIC operation requires an intrinsic scalar variable; '%s'%s"_err_en_US,
+          sym->name(), details);
+      return;
+    }
+  }
+
   // Go over all length parameters, if any, and check if they are
   // explicit.
   if (const DerivedTypeSpec *derived{typeSpec->AsDerived()}) {
@@ -562,7 +578,7 @@ void OmpStructureChecker::CheckAtomicType(
 }
 
 void OmpStructureChecker::CheckAtomicVariable(
-    const SomeExpr &atom, parser::CharBlock source) {
+    const SomeExpr &atom, parser::CharBlock source, bool checkTypeOnPointer) {
   if (atom.Rank() != 0) {
     context_.Say(source, "Atomic variable %s should be a scalar"_err_en_US,
         atom.AsFortran());
@@ -572,7 +588,7 @@ void OmpStructureChecker::CheckAtomicVariable(
   assert(dsgs.size() == 1 && "Should have a single top-level designator");
   evaluate::SymbolVector syms{evaluate::GetSymbolVector(dsgs.front())};
 
-  CheckAtomicType(syms.back(), source, atom.AsFortran());
+  CheckAtomicType(syms.back(), source, atom.AsFortran(), checkTypeOnPointer);
 
   if (IsAllocatable(syms.back()) && !IsArrayElement(atom)) {
     context_.Say(source, "Atomic variable %s cannot be ALLOCATABLE"_err_en_US,
@@ -789,7 +805,8 @@ void OmpStructureChecker::CheckAtomicCaptureAssignment(
   if (!IsVarOrFunctionRef(atom)) {
     ErrorShouldBeVariable(atom, rsrc);
   } else {
-    CheckAtomicVariable(atom, rsrc);
+    CheckAtomicVariable(
+        atom, rsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(capture));
     // This part should have been checked prior to calling this function.
     assert(*GetConvertInput(capture.rhs) == atom &&
         "This cannot be a capture assignment");
@@ -808,7 +825,8 @@ void OmpStructureChecker::CheckAtomicReadAssignment(
     if (!IsVarOrFunctionRef(atom)) {
       ErrorShouldBeVariable(atom, rsrc);
     } else {
-      CheckAtomicVariable(atom, rsrc);
+      CheckAtomicVariable(
+          atom, rsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(read));
       CheckStorageOverlap(atom, {read.lhs}, source);
     }
   } else {
@@ -829,7 +847,8 @@ void OmpStructureChecker::CheckAtomicWriteAssignment(
   if (!IsVarOrFunctionRef(atom)) {
     ErrorShouldBeVariable(atom, rsrc);
   } else {
-    CheckAtomicVariable(atom, lsrc);
+    CheckAtomicVariable(
+        atom, lsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(write));
     CheckStorageOverlap(atom, {write.rhs}, source);
   }
 }
@@ -854,7 +873,8 @@ OmpStructureChecker::CheckAtomicUpdateAssignment(
     return std::nullopt;
   }
 
-  CheckAtomicVariable(atom, lsrc);
+  CheckAtomicVariable(
+      atom, lsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(update));
 
   auto [hasErrors, tryReassoc]{CheckAtomicUpdateAssignmentRhs(
       atom, update.rhs, source, /*suppressDiagnostics=*/true)};
@@ -1017,7 +1037,8 @@ void OmpStructureChecker::CheckAtomicConditionalUpdateAssignment(
     return;
   }
 
-  CheckAtomicVariable(atom, alsrc);
+  CheckAtomicVariable(
+      atom, alsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(assign));
 
   auto top{GetTopLevelOperationIgnoreResizing(cond)};
   // Missing arguments to operations would have been diagnosed by now.
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index f507278..543642ff 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -262,10 +262,10 @@ private:
   void CheckStorageOverlap(const evaluate::Expr<evaluate::SomeType> &,
       llvm::ArrayRef<evaluate::Expr<evaluate::SomeType>>, parser::CharBlock);
   void ErrorShouldBeVariable(const MaybeExpr &expr, parser::CharBlock source);
-  void CheckAtomicType(
-      SymbolRef sym, parser::CharBlock source, std::string_view name);
-  void CheckAtomicVariable(
-      const evaluate::Expr<evaluate::SomeType> &, parser::CharBlock);
+  void CheckAtomicType(SymbolRef sym, parser::CharBlock source,
+      std::string_view name, bool checkTypeOnPointer = true);
+  void CheckAtomicVariable(const evaluate::Expr<evaluate::SomeType> &,
+      parser::CharBlock, bool checkTypeOnPointer = true);
   std::pair<const parser::ExecutionPartConstruct *,
       const parser::ExecutionPartConstruct *>
   CheckUpdateCapture(const parser::ExecutionPartConstruct *ec1,
diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf
new file mode 100644
index 0000000..f68a9aa
--- /dev/null
+++ b/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf
@@ -0,0 +1,15 @@
+! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s
+
+program test
+implicit none
+
+type :: t1
+   real(4) :: x_fin(1:10) = acos(-1.0_4)
+end type t1
+
+type(t1), allocatable, device :: t(:)
+
+! CHECK: not yet implemented: CUDA Fortran: allocate on device with default initialization
+allocate(t(1:2))
+
+end program
diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf
new file mode 100644
index 0000000..3e59e2f
--- /dev/null
+++ b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf
@@ -0,0 +1,9 @@
+! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s
+
+program main
+  implicit none
+  integer, device, allocatable :: a_d(:)
+  integer, allocatable :: a(:)
+! CHECK: not yet implemented: CUDA Fortran: allocate with device source
+  allocate(a, source=a_d)
+end program
diff --git a/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf
new file mode 100644
index 0000000..af850d5
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf
@@ -0,0 +1,21 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Test detection of CUDA Fortran data transfer in presence of associuate
+! statement.
+
+module m
+  real(8), device, dimension(10,10,10) :: d
+end module m
+
+subroutine foo
+  use m
+  !@CUF associate(d1 => d)
+  d1 = 0.0
+  !@CUF end associate
+end subroutine
+
+! CHECK-LABEL: func.func @_QPfoo()
+! CHECK: %[[D:.*]] = fir.address_of(@_QMmEd) : !fir.ref<!fir.array<10x10x10xf64>>
+! CHECK: %[[D_DECL:.*]]:2 = hlfir.declare %[[D]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmEd"} : (!fir.ref<!fir.array<10x10x10xf64>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x10x10xf64>>, !fir.ref<!fir.array<10x10x10xf64>>)
+! CHECK: %[[D1_DECL:.*]]:2 = hlfir.declare %[[D_DECL]]#0(%4) {uniq_name = "_QFfooEd1"} : (!fir.ref<!fir.array<10x10x10xf64>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x10x10xf64>>, !fir.ref<!fir.array<10x10x10xf64>>)
+! CHECK: cuf.data_transfer %{{.*}} to %[[D1_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : f64, !fir.ref<!fir.array<10x10x10xf64>>
diff --git a/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
new file mode 100644
index 0000000..7fc30b4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
@@ -0,0 +1,21 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
+!
+! Checks:
+!  - C_PTR mappings expand to `__address` member with CLOSE under USM paths.
+!  - use_device_ptr does not implicitly expand member operands in the clause.
+
+subroutine only_cptr_use_device_ptr
+  use iso_c_binding
+  type(c_ptr) :: cptr
+  integer :: i
+
+  !$omp target data use_device_ptr(cptr) map(tofrom: i)
+  !$omp end target data
+end subroutine
+
+! CHECK-LABEL: func.func @_QPonly_cptr_use_device_ptr()
+! CHECK: %[[I_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "i"}
+! CHECK: %[[CP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>, !fir.type<{{.*}}__builtin_c_ptr{{.*}}>) map_clauses(return_param) capture(ByRef) -> !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>
+! CHECK: omp.target_data map_entries(%[[I_MAP]] : !fir.ref<i32>) use_device_ptr(%[[CP_MAP]] -> %{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>) {
+! CHECK:   omp.terminator
+! CHECK: }
diff --git a/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90 b/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90
new file mode 100644
index 0000000..6268b0b
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90
@@ -0,0 +1,8 @@
+! RUN: not %flang_fc1 -fopenmp -fsyntax-only %s 2>&1 | FileCheck %s
+type t
+end type
+type(t), pointer :: a1, a2
+!$omp atomic write
+a1 = a2
+! CHECK: error: ATOMIC operation requires an intrinsic scalar variable; 'a1' has the POINTER attribute and derived type 't'
+end
diff --git a/flang/test/Semantics/dynamic-type-intrinsics.f90 b/flang/test/Semantics/dynamic-type-intrinsics.f90
new file mode 100644
index 0000000..a4ce3db
--- /dev/null
+++ b/flang/test/Semantics/dynamic-type-intrinsics.f90
@@ -0,0 +1,73 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+module m
+    type :: t1
+      real :: x
+    end type
+    type :: t2(k)
+      integer, kind :: k
+      real(kind=k) :: x
+    end type
+    type :: t3
+      real :: x
+    end type
+    type, extends(t1) :: t4
+      integer :: y
+    end type
+    type :: t5
+      sequence
+      integer :: x
+      integer :: y
+    end type
+
+    integer :: i
+    real :: r
+    type(t1) :: x1, y1
+    type(t2(4)) :: x24, y24
+    type(t2(8)) :: x28
+    type(t3) :: x3
+    type(t4) :: x4
+    type(t5) :: x5
+    class(t1), allocatable :: a1
+    class(t3), allocatable :: a3
+
+    integer(kind=merge(kind(1),-1,same_type_as(x1, x1))) same_type_as_x1_x1_true
+    integer(kind=merge(kind(1),-1,same_type_as(x1, y1))) same_type_as_x1_y1_true
+    integer(kind=merge(kind(1),-1,same_type_as(x24, x24))) same_type_as_x24_x24_true
+    integer(kind=merge(kind(1),-1,same_type_as(x24, y24))) same_type_as_x24_y24_true
+    integer(kind=merge(kind(1),-1,same_type_as(x24, x28))) same_type_as_x24_x28_true
+    !ERROR: INTEGER(KIND=-1) is not a supported type
+    integer(kind=merge(kind(1),-1,same_type_as(x1, x3))) same_type_as_x1_x3_false
+    !ERROR: INTEGER(KIND=-1) is not a supported type
+    integer(kind=merge(kind(1),-1,same_type_as(a1, a3))) same_type_as_a1_a3_false
+    !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+    logical :: t1_8 = same_type_as(x5, x5)
+    !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+    logical :: t1_9 = same_type_as(x5, x1)
+    !ERROR: Actual argument for 'b=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+    logical :: t1_10 = same_type_as(x1, x5)
+    !ERROR: Actual argument for 'a=' has bad type 'INTEGER(4)', expected extensible or unlimited polymorphic type
+    logical :: t1_11 = same_type_as(i, i)
+    !ERROR: Actual argument for 'a=' has bad type 'REAL(4)', expected extensible or unlimited polymorphic type
+    logical :: t1_12 = same_type_as(r, r)
+    !ERROR: Actual argument for 'a=' has bad type 'INTEGER(4)', expected extensible or unlimited polymorphic type
+    logical :: t1_13 = same_type_as(i, t)
+
+    integer(kind=merge(kind(1),-1,extends_type_of(x1, y1))) extends_type_of_x1_y1_true
+    integer(kind=merge(kind(1),-1,extends_type_of(x24, x24))) extends_type_of_x24_x24_true
+    integer(kind=merge(kind(1),-1,extends_type_of(x24, y24))) extends_type_of_x24_y24_true
+    integer(kind=merge(kind(1),-1,extends_type_of(x24, x28))) extends_type_of_x24_x28_true
+    !ERROR: INTEGER(KIND=-1) is not a supported type
+    integer(kind=merge(kind(1),-1,extends_type_of(x1, x3))) extends_type_of_x1_x3_false
+    !ERROR: INTEGER(KIND=-1) is not a supported type
+    integer(kind=merge(kind(1),-1,extends_type_of(a1, a3))) extends_type_of_a1_a3_false
+    !ERROR: INTEGER(KIND=-1) is not a supported type
+    integer(kind=merge(kind(1),-1,extends_type_of(x1, x4))) extends_type_of_x1_x4_false
+    integer(kind=merge(kind(1),-1,extends_type_of(x4, x1))) extends_type_of_x4_x1_true
+    !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+    logical :: t2_9 = extends_type_of(x5, x5)
+    !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+    logical :: t2_10 = extends_type_of(x5, x1)
+    !ERROR: Actual argument for 'mold=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+    logical :: t2_11 = extends_type_of(x1, x5)
+end module
diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90
index c00deed..6bb7a71 100644
--- a/flang/test/Semantics/io11.f90
+++ b/flang/test/Semantics/io11.f90
@@ -809,3 +809,24 @@ module m29
     end
   end interface
 end
+
+module m30
+    type base
+        character(5), allocatable :: data
+    end type
+    interface write(formatted)
+        subroutine formattedRead (dtv, unit, iotype, v_list, iostat, iomsg)
+        import base
+            !ERROR: Dummy argument 'dtv' of a defined input/output procedure must be a scalar
+            class (base), intent(in) :: dtv(10)
+            integer, intent(in) :: unit
+            !ERROR: Dummy argument 'iotype' of a defined input/output procedure must be a scalar
+            character(*), intent(in) :: iotype(2)
+            integer, intent(in) :: v_list(:)
+            !ERROR: Dummy argument 'iostat' of a defined input/output procedure must be a scalar
+            integer, intent(out) :: iostat(*)
+            !ERROR: Dummy argument 'iomsg' of a defined input/output procedure must be a scalar
+            character(*), intent(inout) :: iomsg(:)
+        end subroutine
+    end interface
+end module
diff --git a/libc/config/baremetal/config.json b/libc/config/baremetal/config.json
index f01e508..ffb4fe6 100644
--- a/libc/config/baremetal/config.json
+++ b/libc/config/baremetal/config.json
@@ -36,7 +36,12 @@
   },
   "math": {
     "LIBC_CONF_MATH_OPTIMIZATIONS": {
-      "value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES)"
+      "value": "(LIBC_MATH_SKIP_ACCURATE_PASS | LIBC_MATH_SMALL_TABLES | LIBC_MATH_INTERMEDIATE_COMP_IN_FLOAT)"
+    }
+  },
+  "general": {
+    "LIBC_ADD_NULL_CHECKS": {
+      "value": false
     }
   }
 }
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index afa90e6..81360aa 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -632,8 +632,9 @@ add_header_macro(
   sys/time.h
   DEPENDS
     .llvm_libc_common_h
-    .llvm-libc-types.struct_timeval
     .llvm-libc-macros.sys_time_macros
+    .llvm-libc-types.struct_itimerval
+    .llvm-libc-types.struct_timeval
 )
 
 add_header_macro(
diff --git a/libc/include/stdio.yaml b/libc/include/stdio.yaml
index 2a0c563..394437b 100644
--- a/libc/include/stdio.yaml
+++ b/libc/include/stdio.yaml
@@ -197,12 +197,26 @@ functions:
       - type: FILE *
       - type: long
       - type: int
+  - name: fseeko
+    standards:
+      - POSIX
+    return_type: int
+    arguments:
+      - type: FILE *
+      - type: off_t
+      - type: int
   - name: ftell
     standards:
       - stdc
     return_type: long
     arguments:
       - type: FILE *
+  - name: ftello
+    standards:
+      - POSIX
+    return_type: off_t
+    arguments:
+      - type: FILE *
   - name: funlockfile
     standards:
       - POSIX
diff --git a/libc/test/src/sys/mman/linux/CMakeLists.txt b/libc/test/src/sys/mman/linux/CMakeLists.txt
index a362c1c..32fee92 100644
--- a/libc/test/src/sys/mman/linux/CMakeLists.txt
+++ b/libc/test/src/sys/mman/linux/CMakeLists.txt
@@ -99,6 +99,7 @@ add_libc_unittest(
     libc.src.sys.mman.mincore
     libc.src.sys.mman.mlock
     libc.src.sys.mman.munlock
+    libc.src.unistd.sysconf
     libc.test.UnitTest.ErrnoCheckingTest
     libc.test.UnitTest.ErrnoSetterMatcher
 )
@@ -123,6 +124,7 @@ add_libc_unittest(
     libc.src.sys.mman.mlockall
     libc.src.sys.mman.munlockall
     libc.src.sys.resource.getrlimit
+    libc.src.unistd.sysconf
     libc.src.__support.OSUtil.osutil
     libc.test.UnitTest.ErrnoCheckingTest
     libc.test.UnitTest.ErrnoSetterMatcher
@@ -144,6 +146,7 @@ add_libc_unittest(
     libc.src.sys.mman.mincore
     libc.src.sys.mman.mlock
     libc.src.sys.mman.munlock
+    libc.src.unistd.sysconf
     libc.test.UnitTest.ErrnoCheckingTest
     libc.test.UnitTest.ErrnoSetterMatcher
 )
@@ -165,6 +168,7 @@ add_libc_unittest(
     libc.src.sys.mman.munmap
     libc.src.fcntl.open
     libc.src.unistd.close
+    libc.src.unistd.sysconf
 )
 
 add_libc_unittest(
diff --git a/libc/test/src/sys/mman/linux/mincore_test.cpp b/libc/test/src/sys/mman/linux/mincore_test.cpp
index fb86252..1806bb1 100644
--- a/libc/test/src/sys/mman/linux/mincore_test.cpp
+++ b/libc/test/src/sys/mman/linux/mincore_test.cpp
@@ -12,6 +12,7 @@
 #include "src/sys/mman/mmap.h"
 #include "src/sys/mman/munlock.h"
 #include "src/sys/mman/munmap.h"
+#include "src/unistd/sysconf.h"
 #include "test/UnitTest/ErrnoCheckingTest.h"
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
@@ -20,8 +21,7 @@ using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
 using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
 using LlvmLibcMincoreTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
 
-// TODO: Replace with sysconf call once the function is properly implemented.
-constexpr size_t PAGE_SIZE = 4096;
+const size_t PAGE_SIZE = LIBC_NAMESPACE::sysconf(_SC_PAGESIZE);
 
 TEST_F(LlvmLibcMincoreTest, UnMappedMemory) {
   unsigned char vec;
diff --git a/libc/test/src/sys/mman/linux/mlock_test.cpp b/libc/test/src/sys/mman/linux/mlock_test.cpp
index f4a072e..0056ccf 100644
--- a/libc/test/src/sys/mman/linux/mlock_test.cpp
+++ b/libc/test/src/sys/mman/linux/mlock_test.cpp
@@ -22,14 +22,14 @@
 #include "src/sys/mman/munlockall.h"
 #include "src/sys/mman/munmap.h"
 #include "src/sys/resource/getrlimit.h"
+#include "src/unistd/sysconf.h"
 #include "test/UnitTest/ErrnoCheckingTest.h"
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
 #include <sys/syscall.h>
 
-// TODO: Replace with sysconf call once the function is properly implemented.
-constexpr size_t PAGE_SIZE = 4096;
+const size_t PAGE_SIZE = LIBC_NAMESPACE::sysconf(_SC_PAGESIZE);
 
 using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher;
 using LlvmLibcMlockTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
diff --git a/libc/test/src/sys/mman/linux/msync_test.cpp b/libc/test/src/sys/mman/linux/msync_test.cpp
index 764a67d..bf9640d 100644
--- a/libc/test/src/sys/mman/linux/msync_test.cpp
+++ b/libc/test/src/sys/mman/linux/msync_test.cpp
@@ -11,12 +11,12 @@
 #include "src/sys/mman/msync.h"
 #include "src/sys/mman/munlock.h"
 #include "src/sys/mman/munmap.h"
+#include "src/unistd/sysconf.h"
 #include "test/UnitTest/ErrnoCheckingTest.h"
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-// TODO: Replace with sysconf call once the function is properly implemented.
-constexpr size_t PAGE_SIZE = 4096;
+const size_t PAGE_SIZE = LIBC_NAMESPACE::sysconf(_SC_PAGESIZE);
 
 using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher;
 using LlvmLibcMsyncTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
diff --git a/libc/test/src/sys/mman/linux/remap_file_pages_test.cpp b/libc/test/src/sys/mman/linux/remap_file_pages_test.cpp
index 094bcb2..08ffffe 100644
--- a/libc/test/src/sys/mman/linux/remap_file_pages_test.cpp
+++ b/libc/test/src/sys/mman/linux/remap_file_pages_test.cpp
@@ -11,6 +11,7 @@
 #include "src/sys/mman/munmap.h"
 #include "src/sys/mman/remap_file_pages.h"
 #include "src/unistd/close.h"
+#include "src/unistd/sysconf.h"
 #include "test/UnitTest/ErrnoCheckingTest.h"
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
@@ -18,8 +19,7 @@
 #include <sys/mman.h>
 #include <sys/stat.h> // For S_IRWXU
 
-// TODO: Replace with sysconf call once the function is properly implemented.
-constexpr size_t PAGE_SIZE = 4096;
+const size_t PAGE_SIZE = LIBC_NAMESPACE::sysconf(_SC_PAGESIZE);
 
 using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
 using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index ba0fc4b..088edc0 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -124,7 +124,7 @@ if( EXISTS ${LIBCLC_CUSTOM_LLVM_TOOLS_BINARY_DIR} )
 endif()
 
 foreach( tool IN ITEMS clang opt llvm-as llvm-link )
-  if( NOT EXISTS "${${tool}_exe}" AND "${tool}_target" STREQUAL "" )
+  if( NOT EXISTS "${${tool}_exe}" AND "${${tool}_target}" STREQUAL "" )
     message( FATAL_ERROR "libclc toolchain incomplete - missing tool ${tool}!" )
   endif()
 endforeach()
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index 1a450218..5e8fe2e 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -68,6 +68,9 @@ Improvements and New Features
   reduced debug information.
 
 - The performance of ``std::find`` has been improved by up to 2x for integral types
+- The ``std::distance`` and ``std::ranges::distance`` algorithms have been optimized for segmented iterators (e.g.,
+  ``std::join_view`` iterators), reducing the complexity from ``O(n)`` to ``O(n / segment_size)``. Benchmarks show
+  performance improvements of over 1600x in favorable cases with large segment sizes (e.g., 1024).
 
 Deprecations and Removals
 -------------------------
diff --git a/libcxx/include/__iterator/distance.h b/libcxx/include/__iterator/distance.h
index 1732aa5..9be9db0 100644
--- a/libcxx/include/__iterator/distance.h
+++ b/libcxx/include/__iterator/distance.h
@@ -10,41 +10,71 @@
 #ifndef _LIBCPP___ITERATOR_DISTANCE_H
 #define _LIBCPP___ITERATOR_DISTANCE_H
 
+#include <__algorithm/for_each_segment.h>
 #include <__config>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
 #include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/size.h>
 #include <__type_traits/decay.h>
+#include <__type_traits/enable_if.h>
 #include <__type_traits/remove_cvref.h>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _InputIter>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
-__distance(_InputIter __first, _InputIter __last, input_iterator_tag) {
-  typename iterator_traits<_InputIter>::difference_type __r(0);
+#if _LIBCPP_STD_VER >= 20
+template <class _Iter>
+using __iter_distance_t _LIBCPP_NODEBUG = std::iter_difference_t<_Iter>;
+#else
+template <class _Iter>
+using __iter_distance_t _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type;
+#endif
+
+template <class _InputIter, class _Sent>
+inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_InputIter> __distance(_InputIter __first, _Sent __last) {
+  __iter_distance_t<_InputIter> __r(0);
   for (; __first != __last; ++__first)
     ++__r;
   return __r;
 }
 
-template <class _RandIter>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_RandIter>::difference_type
-__distance(_RandIter __first, _RandIter __last, random_access_iterator_tag) {
+template <class _RandIter, __enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_RandIter>
+__distance(_RandIter __first, _RandIter __last) {
   return __last - __first;
 }
 
+#if _LIBCPP_STD_VER >= 20
+template <class _SegmentedIter,
+          __enable_if_t<!__has_random_access_iterator_category<_SegmentedIter>::value &&
+                            __is_segmented_iterator_v<_SegmentedIter>,
+                        int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_SegmentedIter>
+__distance(_SegmentedIter __first, _SegmentedIter __last) {
+  __iter_distance_t<_SegmentedIter> __r(0);
+  std::__for_each_segment(__first, __last, [&__r](auto __lfirst, auto __llast) {
+    __r += std::__distance(__lfirst, __llast);
+  });
+  return __r;
+}
+#endif // _LIBCPP_STD_VER >= 20
+
 template <class _InputIter>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
 distance(_InputIter __first, _InputIter __last) {
-  return std::__distance(__first, __last, typename iterator_traits<_InputIter>::iterator_category());
+  return std::__distance(__first, __last);
 }
 
 #if _LIBCPP_STD_VER >= 20
@@ -56,12 +86,7 @@ struct __distance {
   template <class _Ip, sentinel_for<_Ip> _Sp>
     requires(!sized_sentinel_for<_Sp, _Ip>)
   _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Ip> operator()(_Ip __first, _Sp __last) const {
-    iter_difference_t<_Ip> __n = 0;
-    while (__first != __last) {
-      ++__first;
-      ++__n;
-    }
-    return __n;
+    return std::__distance(std::move(__first), std::move(__last));
   }
 
   template <class _Ip, sized_sentinel_for<decay_t<_Ip>> _Sp>
@@ -92,4 +117,6 @@ inline constexpr auto distance = __distance{};
 
 _LIBCPP_END_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___ITERATOR_DISTANCE_H
diff --git a/libcxx/test/benchmarks/iterators/distance.bench.cpp b/libcxx/test/benchmarks/iterators/distance.bench.cpp
new file mode 100644
index 0000000..186ef79
--- /dev/null
+++ b/libcxx/test/benchmarks/iterators/distance.bench.cpp
@@ -0,0 +1,84 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cstddef>
+#include <deque>
+#include <iterator>
+#include <ranges>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+
+int main(int argc, char** argv) {
+  auto std_distance = [](auto first, auto last) { return std::distance(first, last); };
+
+  // {std,ranges}::distance(std::deque)
+  {
+    auto bm = [](std::string name, auto distance) {
+      benchmark::RegisterBenchmark(
+          name,
+          [distance](auto& st) {
+            std::size_t const size = st.range(0);
+            std::deque<int> c(size, 1);
+
+            for ([[maybe_unused]] auto _ : st) {
+              benchmark::DoNotOptimize(c);
+              auto result = distance(c.begin(), c.end());
+              benchmark::DoNotOptimize(result);
+            }
+          })
+          ->Arg(50) // non power-of-two
+          ->Arg(1024)
+          ->Arg(4096)
+          ->Arg(8192);
+    };
+    bm.operator()("std::distance(deque<int>)", std_distance);
+    bm.operator()("rng::distance(deque<int>)", std::ranges::distance);
+  }
+
+  // {std,ranges}::distance(std::join_view)
+  {
+    auto bm = []<class Container>(std::string name, auto distance, std::size_t seg_size) {
+      benchmark::RegisterBenchmark(
+          name,
+          [distance, seg_size](auto& st) {
+            std::size_t const size     = st.range(0);
+            std::size_t const segments = (size + seg_size - 1) / seg_size;
+            Container c(segments);
+            for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
+              c[i].resize(std::min(seg_size, n));
+            }
+
+            auto view  = c | std::views::join;
+            auto first = view.begin();
+            auto last  = view.end();
+
+            for ([[maybe_unused]] auto _ : st) {
+              benchmark::DoNotOptimize(c);
+              auto result = distance(first, last);
+              benchmark::DoNotOptimize(result);
+            }
+          })
+          ->Arg(50) // non power-of-two
+          ->Arg(1024)
+          ->Arg(4096)
+          ->Arg(8192);
+    };
+    bm.operator()<std::vector<std::vector<int>>>("std::distance(join_view(vector<vector<int>>))", std_distance, 256);
+    bm.operator()<std::vector<std::vector<int>>>(
+        "rng::distance(join_view(vector<vector<int>>)", std::ranges::distance, 256);
+  }
+
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+  benchmark::Shutdown();
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
index 13caeff..d92a44f 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
@@ -16,38 +16,73 @@
 //   Iter::difference_type
 //   distance(Iter first, Iter last); // constexpr in C++17
 
-#include <iterator>
+#include <array>
 #include <cassert>
+#include <deque>
+#include <iterator>
+#include <vector>
 #include <type_traits>
 
 #include "test_macros.h"
 #include "test_iterators.h"
 
 template <class It>
-TEST_CONSTEXPR_CXX17
-void check_distance(It first, It last, typename std::iterator_traits<It>::difference_type dist)
-{
-    typedef typename std::iterator_traits<It>::difference_type Difference;
-    static_assert(std::is_same<decltype(std::distance(first, last)), Difference>::value, "");
-    assert(std::distance(first, last) == dist);
+TEST_CONSTEXPR_CXX17 void check_distance(It first, It last, typename std::iterator_traits<It>::difference_type dist) {
+  typedef typename std::iterator_traits<It>::difference_type Difference;
+  static_assert(std::is_same<decltype(std::distance(first, last)), Difference>::value, "");
+  assert(std::distance(first, last) == dist);
 }
 
-TEST_CONSTEXPR_CXX17 bool tests()
-{
-    const char* s = "1234567890";
-    check_distance(cpp17_input_iterator<const char*>(s), cpp17_input_iterator<const char*>(s+10), 10);
-    check_distance(forward_iterator<const char*>(s), forward_iterator<const char*>(s+10), 10);
-    check_distance(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s+10), 10);
-    check_distance(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+10), 10);
-    check_distance(s, s+10, 10);
-    return true;
+#if TEST_STD_VER >= 20
+/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
+  using Container = std::deque<std::deque<double>>;
+  Container c;
+  auto view                    = c | std::views::join;
+  Container::difference_type n = 0;
+  for (std::size_t i = 0; i < 10; ++i) {
+    n += i;
+    c.push_back(Container::value_type(i));
+  }
+  assert(std::distance(view.begin(), view.end()) == n);
+}
+#endif
+
+TEST_CONSTEXPR_CXX17 bool tests() {
+  const char* s = "1234567890";
+  check_distance(cpp17_input_iterator<const char*>(s), cpp17_input_iterator<const char*>(s + 10), 10);
+  check_distance(forward_iterator<const char*>(s), forward_iterator<const char*>(s + 10), 10);
+  check_distance(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s + 10), 10);
+  check_distance(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s + 10), 10);
+  check_distance(s, s + 10, 10);
+
+#if TEST_STD_VER >= 20
+  {
+    using Container = std::vector<std::vector<int>>;
+    Container c;
+    auto view                    = c | std::views::join;
+    Container::difference_type n = 0;
+    for (std::size_t i = 0; i < 10; ++i) {
+      n += i;
+      c.push_back(Container::value_type(i));
+    }
+    assert(std::distance(view.begin(), view.end()) == n);
+  }
+  {
+    using Container = std::array<std::array<char, 3>, 10>;
+    Container c;
+    auto view = c | std::views::join;
+    assert(std::distance(view.begin(), view.end()) == 30);
+  }
+  if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+    test_deque();
+#endif
+  return true;
 }
 
-int main(int, char**)
-{
-    tests();
+int main(int, char**) {
+  tests();
 #if TEST_STD_VER >= 17
-    static_assert(tests(), "");
+  static_assert(tests(), "");
 #endif
-    return 0;
+  return 0;
 }
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.distance/iterator_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.distance/iterator_sentinel.pass.cpp
index b4199b7..1b78489 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.distance/iterator_sentinel.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.distance/iterator_sentinel.pass.cpp
@@ -15,18 +15,21 @@
 // template<class I, sized_sentinel_for<decay_t<I>> S>
 //   constexpr iter_difference_t<I> ranges::distance(I&& first, S last); // TODO: update when LWG3664 is resolved
 
-#include <iterator>
+#include <array>
 #include <cassert>
+#include <deque>
+#include <iterator>
+#include <vector>
 
 #include "test_iterators.h"
 #include "test_macros.h"
 
-template<class It, class Sent>
+template <class It, class Sent>
 constexpr void test_unsized() {
   static_assert(std::sentinel_for<Sent, It> && !std::sized_sentinel_for<Sent, It>);
-  int a[3] = {1,2,3};
+  int a[3] = {1, 2, 3};
   {
-    It first = It(a);
+    It first  = It(a);
     auto last = Sent(It(a));
     assert(std::ranges::distance(first, last) == 0);
     assert(std::ranges::distance(It(a), last) == 0);
@@ -36,7 +39,7 @@ constexpr void test_unsized() {
   }
   {
     auto check = [&a]<class ItQual, class SentQual> {
-      It first = It(a);
+      It first  = It(a);
       Sent last = Sent(It(a + 3));
       assert(std::ranges::distance(static_cast<ItQual>(first), static_cast<SentQual>(last)) == 3);
     };
@@ -61,13 +64,13 @@ constexpr void test_unsized() {
   }
 }
 
-template<class It, class Sent>
+template <class It, class Sent>
 constexpr void test_sized() {
   static_assert(std::sized_sentinel_for<Sent, It>);
-  int a[] = {1,2,3};
+  int a[] = {1, 2, 3};
   {
     auto check = [&a]<class ItQual, class SentQual> {
-      It first = It(a + 3);
+      It first  = It(a + 3);
       Sent last = Sent(It(a));
       assert(std::ranges::distance(static_cast<ItQual>(first), static_cast<SentQual>(last)) == -3);
     };
@@ -91,7 +94,7 @@ constexpr void test_sized() {
     check.template operator()<const It&&, const Sent&&>();
   }
   {
-    It first = It(a);
+    It first  = It(a);
     auto last = Sent(It(a));
     assert(std::ranges::distance(first, last) == 0);
     assert(std::ranges::distance(It(a), last) == 0);
@@ -100,7 +103,7 @@ constexpr void test_sized() {
     ASSERT_SAME_TYPE(decltype(std::ranges::distance(It(a), Sent(It(a)))), std::iter_difference_t<It>);
   }
   {
-    It first = It(a);
+    It first  = It(a);
     auto last = Sent(It(a + 3));
     assert(std::ranges::distance(first, last) == 3);
     assert(std::ranges::distance(It(a), last) == 3);
@@ -110,13 +113,17 @@ constexpr void test_sized() {
 }
 
 struct StrideCounter {
-  int *it_;
-  int *inc_;
-  using value_type = int;
+  int* it_;
+  int* inc_;
+  using value_type      = int;
   using difference_type = int;
   explicit StrideCounter();
-  constexpr explicit StrideCounter(int *it, int *inc) : it_(it), inc_(inc) {}
-  constexpr auto& operator++() { ++it_; *inc_ += 1; return *this; }
+  constexpr explicit StrideCounter(int* it, int* inc) : it_(it), inc_(inc) {}
+  constexpr auto& operator++() {
+    ++it_;
+    *inc_ += 1;
+    return *this;
+  }
   StrideCounter operator++(int);
   int& operator*() const;
   bool operator==(StrideCounter) const;
@@ -125,11 +132,11 @@ static_assert(std::forward_iterator<StrideCounter>);
 static_assert(!std::sized_sentinel_for<StrideCounter, StrideCounter>);
 
 struct SizedStrideCounter {
-  int *it_;
-  int *minus_;
+  int* it_;
+  int* minus_;
   using value_type = int;
   explicit SizedStrideCounter();
-  constexpr explicit SizedStrideCounter(int *it, int *minus) : it_(it), minus_(minus) {}
+  constexpr explicit SizedStrideCounter(int* it, int* minus) : it_(it), minus_(minus) {}
   SizedStrideCounter& operator++();
   SizedStrideCounter operator++(int);
   int& operator*() const;
@@ -147,22 +154,34 @@ constexpr void test_stride_counting() {
     int a[] = {1, 2, 3};
     int inc = 0;
     StrideCounter first(a, &inc);
-    StrideCounter last(a+3, nullptr);
+    StrideCounter last(a + 3, nullptr);
     std::same_as<int> auto result = std::ranges::distance(first, last);
     assert(result == 3);
     assert(inc == 3);
   }
   {
-    int a[] = {1, 2, 3};
+    int a[]   = {1, 2, 3};
     int minus = 0;
     SizedStrideCounter first(a, &minus);
-    SizedStrideCounter last(a+3, nullptr);
+    SizedStrideCounter last(a + 3, nullptr);
     std::same_as<int> auto result = std::ranges::distance(first, last);
     assert(result == 3);
     assert(minus == 1);
   }
 }
 
+/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
+  using Container = std::deque<std::deque<double>>;
+  Container c;
+  auto view                    = c | std::views::join;
+  Container::difference_type n = 0;
+  for (std::size_t i = 0; i < 10; ++i) {
+    n += i;
+    c.push_back(Container::value_type(i));
+  }
+  assert(std::ranges::distance(view.begin(), view.end()) == n);
+}
+
 constexpr bool test() {
   {
     int a[] = {1, 2, 3};
@@ -197,7 +216,7 @@ constexpr bool test() {
   test_sized<contiguous_iterator<int*>, contiguous_iterator<int*>>();
 
   {
-    using It = cpp20_input_iterator<int*>;  // non-copyable, thus not a sentinel for itself
+    using It = cpp20_input_iterator<int*>; // non-copyable, thus not a sentinel for itself
     static_assert(!std::is_copy_constructible_v<It>);
     static_assert(!std::sentinel_for<It, It>);
     static_assert(!std::is_invocable_v<decltype(std::ranges::distance), It&, It&>);
@@ -206,10 +225,10 @@ constexpr bool test() {
     static_assert(!std::is_invocable_v<decltype(std::ranges::distance), It&&, It&&>);
   }
   {
-    using It = cpp20_input_iterator<int*>;  // non-copyable
-    using Sent = sentinel_wrapper<It>;  // not a sized sentinel
+    using It   = cpp20_input_iterator<int*>; // non-copyable
+    using Sent = sentinel_wrapper<It>;       // not a sized sentinel
     static_assert(std::sentinel_for<Sent, It> && !std::sized_sentinel_for<Sent, It>);
-    int a[] = {1,2,3};
+    int a[]   = {1, 2, 3};
     Sent last = Sent(It(a + 3));
     static_assert(!std::is_invocable_v<decltype(std::ranges::distance), It&, Sent&>);
     static_assert(!std::is_invocable_v<decltype(std::ranges::distance), It&, Sent&&>);
@@ -217,7 +236,7 @@ constexpr bool test() {
     assert(std::ranges::distance(It(a), Sent(It(a + 3))) == 3);
   }
   {
-    using It = cpp17_input_iterator<int*>;  // not a sentinel for itself
+    using It = cpp17_input_iterator<int*>; // not a sentinel for itself
     static_assert(!std::sentinel_for<It, It>);
     static_assert(!std::is_invocable_v<decltype(std::ranges::distance), It&, It&>);
     static_assert(!std::is_invocable_v<decltype(std::ranges::distance), It&, It&&>);
@@ -231,6 +250,26 @@ constexpr bool test() {
   static_assert(!std::is_invocable_v<decltype(std::ranges::distance), int, int*>);
   static_assert(!std::is_invocable_v<decltype(std::ranges::distance), int*, char*>);
 
+  {
+    using Container = std::vector<std::vector<int>>;
+    Container c;
+    auto view                    = c | std::views::join;
+    Container::difference_type n = 0;
+    for (std::size_t i = 0; i < 10; ++i) {
+      n += i;
+      c.push_back(Container::value_type(i));
+    }
+    assert(std::ranges::distance(view.begin(), view.end()) == n);
+  }
+  {
+    using Container = std::array<std::array<char, 3>, 10>;
+    Container c;
+    auto view = c | std::views::join;
+    assert(std::ranges::distance(view.begin(), view.end()) == 30);
+  }
+  if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+    test_deque();
+
   return true;
 }
 
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp
index 5264e770..881a5d2 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp
@@ -226,7 +226,9 @@ constexpr bool test() {
 
 #ifdef _LIBCPP_VERSION
   // These types should be implicit-lifetime, but they are not guaranteed to be so.
+#  ifndef _LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR
   test_is_implicit_lifetime<std::pair<int, float>>();
+#  endif
   test_is_implicit_lifetime<std::tuple<int, float>>();
 #endif
 
diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml
index d564ea6..ca83af9 100644
--- a/libcxx/utils/ci/buildkite-pipeline.yml
+++ b/libcxx/utils/ci/buildkite-pipeline.yml
@@ -122,8 +122,8 @@ steps:
   - label: FreeBSD 13 amd64
     command: libcxx/utils/ci/run-buildbot generic-cxx26
     env:
-      CC: clang19
-      CXX: clang++19
+      CC: clang20
+      CXX: clang++20
     agents:
       queue: libcxx-builders
       os: freebsd
diff --git a/lldb/docs/resources/lldbgdbremote.md b/lldb/docs/resources/lldbgdbremote.md
index 93fb0c9..f0c5e6b 100644
--- a/lldb/docs/resources/lldbgdbremote.md
+++ b/lldb/docs/resources/lldbgdbremote.md
@@ -738,6 +738,57 @@ This is a performance optimization, which speeds up debugging by avoiding
 multiple round-trips for retrieving thread information. The information from this
 packet can be retrieved using a combination of `qThreadStopInfo` and `m` packets.
 
+### MultiMemRead
+
+Read memory from multiple memory ranges.
+
+This packet has one argument:
+
+* `ranges`: a list of pairs of numbers, formatted in base-16. Each pair is
+separated by a `,`, as is each number in each pair. The first number of the
+pair denotes the base address of the memory read, the second denotes the number
+of bytes to be read. The list must end with a `;`.
+
+The reply packet starts with a comma-separated list of numbers formatted in
+base-16, denoting how many bytes were read from each range, in the same order
+as the request packet. The list is followed by a `;`, followed by a sequence of
+bytes containing binary encoded data for all memory that was read. The length
+of the binary encodeed data, after being decoded as required by the GDB remote
+protocol, must be equal to the sum of the numbers provided at the start of the
+reply. The order of the binary data is the same as the order of the ranges in
+the request packet.
+
+If some part of a range is not readable, the stub may perform a partial read of
+a prefix of the range. In other words, partial reads will only ever be from the
+start of the range, never the middle or end. Support for partial reads depends
+on the debug stub.
+
+If, by applying the rules above, the stub has read zero bytes from a range, it
+must return a length of zero for that range in the reply packet; no bytes for
+this memory range are included in the sequence of bytes that follows.
+
+A stub that supports this packet must include `MultiMemRead+` in the reply to
+`qSupported`.
+
+```
+send packet: $MultiMemRead:ranges:100a00,4,200200,a0,400000,4;
+read packet: $4,0,2;<binary encoding of abcd1000><binary encoding of eeff>
+```
+
+In the example above, the first read produced `abcd1000`, the read of `a0`
+bytes from address `200200` failed to read any bytes, and the third read
+produced two bytes – `eeff` – out of the four requested.
+
+```
+send packet: $MultiMemRead:ranges:100a00,0;
+read packet: $0;
+```
+
+In the example above, a read of zero bytes was requested.
+
+**Priority to Implement:** Only required for performance, the debugger will
+fall back to doing separate read requests if this packet is unavailable.
+
 ## QEnvironment:NAME=VALUE
 
 Setup the environment up for a new child process that will soon be
diff --git a/lldb/include/lldb/Utility/XcodeSDK.h b/lldb/include/lldb/Utility/XcodeSDK.h
index 5b345a4..5f89019 100644
--- a/lldb/include/lldb/Utility/XcodeSDK.h
+++ b/lldb/include/lldb/Utility/XcodeSDK.h
@@ -38,7 +38,7 @@ public:
     watchOS,
     XRSimulator,
     XROS,
-    bridgeOS,
+    BridgeOS,
     Linux,
     unknown = -1
   };
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 8eb64b4..a3d924d 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -27,6 +27,10 @@ from typing import (
     Literal,
 )
 
+# set timeout based on whether ASAN was enabled or not. Increase
+# timeout by a factor of 10 if ASAN is enabled.
+DEFAULT_TIMEOUT = 10 * (10 if ("ASAN_OPTIONS" in os.environ) else 1)
+
 ## DAP type references
 
 
@@ -282,26 +286,24 @@ class DebugCommunication(object):
     def collect_output(
         self,
         category: str,
-        timeout: float,
         pattern: Optional[str] = None,
         clear=True,
     ) -> str:
         """Collect output from 'output' events.
         Args:
             category: The category to collect.
-            timeout: The max duration for collecting output.
             pattern:
                 Optional, if set, return once this pattern is detected in the
                 collected output.
         Returns:
             The collected output.
         """
-        deadline = time.monotonic() + timeout
+        deadline = time.monotonic() + DEFAULT_TIMEOUT
         output = self.get_output(category, clear)
         while deadline >= time.monotonic() and (
             pattern is None or pattern not in output
         ):
-            event = self.wait_for_event(["output"], timeout=deadline - time.monotonic())
+            event = self.wait_for_event(["output"])
             if not event:  # Timeout or EOF
                 break
             output += self.get_output(category, clear=clear)
@@ -339,7 +341,7 @@ class DebugCommunication(object):
         self,
         *,
         predicate: Optional[Callable[[ProtocolMessage], bool]] = None,
-        timeout: Optional[float] = None,
+        timeout: Optional[float] = DEFAULT_TIMEOUT,
     ) -> Optional[ProtocolMessage]:
         """Processes received packets from the adapter.
         Updates the DebugCommunication stateful properties based on the received
@@ -555,25 +557,20 @@ class DebugCommunication(object):
 
         return cast(Optional[Response], self._recv_packet(predicate=predicate))
 
-    def wait_for_event(
-        self, filter: List[str] = [], timeout: Optional[float] = None
-    ) -> Optional[Event]:
+    def wait_for_event(self, filter: List[str] = []) -> Optional[Event]:
         """Wait for the first event that matches the filter."""
 
         def predicate(p: ProtocolMessage):
             return p["type"] == "event" and p["event"] in filter
 
         return cast(
-            Optional[Event], self._recv_packet(predicate=predicate, timeout=timeout)
+            Optional[Event],
+            self._recv_packet(predicate=predicate),
         )
 
-    def wait_for_stopped(
-        self, timeout: Optional[float] = None
-    ) -> Optional[List[Event]]:
+    def wait_for_stopped(self) -> Optional[List[Event]]:
         stopped_events = []
-        stopped_event = self.wait_for_event(
-            filter=["stopped", "exited"], timeout=timeout
-        )
+        stopped_event = self.wait_for_event(filter=["stopped", "exited"])
         while stopped_event:
             stopped_events.append(stopped_event)
             # If we exited, then we are done
@@ -582,26 +579,28 @@ class DebugCommunication(object):
             # Otherwise we stopped and there might be one or more 'stopped'
             # events for each thread that stopped with a reason, so keep
             # checking for more 'stopped' events and return all of them
-            stopped_event = self.wait_for_event(
-                filter=["stopped", "exited"], timeout=0.25
+            # Use a shorter timeout for additional stopped events
+            def predicate(p: ProtocolMessage):
+                return p["type"] == "event" and p["event"] in ["stopped", "exited"]
+
+            stopped_event = cast(
+                Optional[Event], self._recv_packet(predicate=predicate, timeout=0.25)
             )
         return stopped_events
 
-    def wait_for_breakpoint_events(self, timeout: Optional[float] = None):
+    def wait_for_breakpoint_events(self):
         breakpoint_events: list[Event] = []
         while True:
-            event = self.wait_for_event(["breakpoint"], timeout=timeout)
+            event = self.wait_for_event(["breakpoint"])
             if not event:
                 break
             breakpoint_events.append(event)
         return breakpoint_events
 
-    def wait_for_breakpoints_to_be_verified(
-        self, breakpoint_ids: list[str], timeout: Optional[float] = None
-    ):
+    def wait_for_breakpoints_to_be_verified(self, breakpoint_ids: list[str]):
         """Wait for all breakpoints to be verified. Return all unverified breakpoints."""
         while any(id not in self.resolved_breakpoints for id in breakpoint_ids):
-            breakpoint_event = self.wait_for_event(["breakpoint"], timeout=timeout)
+            breakpoint_event = self.wait_for_event(["breakpoint"])
             if breakpoint_event is None:
                 break
 
@@ -614,14 +613,14 @@ class DebugCommunication(object):
             )
         ]
 
-    def wait_for_exited(self, timeout: Optional[float] = None):
-        event_dict = self.wait_for_event(["exited"], timeout=timeout)
+    def wait_for_exited(self):
+        event_dict = self.wait_for_event(["exited"])
         if event_dict is None:
             raise ValueError("didn't get exited event")
         return event_dict
 
-    def wait_for_terminated(self, timeout: Optional[float] = None):
-        event_dict = self.wait_for_event(["terminated"], timeout)
+    def wait_for_terminated(self):
+        event_dict = self.wait_for_event(["terminated"])
         if event_dict is None:
             raise ValueError("didn't get terminated event")
         return event_dict
@@ -1610,7 +1609,7 @@ class DebugAdapterServer(DebugCommunication):
                     # new messages will arrive and it should shutdown on its
                     # own.
                     process.stdin.close()
-                    process.wait(timeout=20)
+                    process.wait(timeout=DEFAULT_TIMEOUT)
                 except subprocess.TimeoutExpired:
                     process.kill()
                     process.wait()
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
index f7b1ed8..29935bb 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
@@ -18,7 +18,7 @@ import base64
 class DAPTestCaseBase(TestBase):
     # set timeout based on whether ASAN was enabled or not. Increase
     # timeout by a factor of 10 if ASAN is enabled.
-    DEFAULT_TIMEOUT = 10 * (10 if ("ASAN_OPTIONS" in os.environ) else 1)
+    DEFAULT_TIMEOUT = dap_server.DEFAULT_TIMEOUT
     NO_DEBUG_INFO_TESTCASE = True
 
     def create_debug_adapter(
@@ -118,11 +118,9 @@ class DAPTestCaseBase(TestBase):
             self.wait_for_breakpoints_to_resolve(breakpoint_ids)
         return breakpoint_ids
 
-    def wait_for_breakpoints_to_resolve(
-        self, breakpoint_ids: list[str], timeout: Optional[float] = DEFAULT_TIMEOUT
-    ):
+    def wait_for_breakpoints_to_resolve(self, breakpoint_ids: list[str]):
         unresolved_breakpoints = self.dap_server.wait_for_breakpoints_to_be_verified(
-            breakpoint_ids, timeout
+            breakpoint_ids
         )
         self.assertEqual(
             len(unresolved_breakpoints),
@@ -134,11 +132,10 @@ class DAPTestCaseBase(TestBase):
         self,
         predicate: Callable[[], bool],
         delay: float = 0.5,
-        timeout: float = DEFAULT_TIMEOUT,
     ) -> bool:
         """Repeatedly run the predicate until either the predicate returns True
         or a timeout has occurred."""
-        deadline = time.monotonic() + timeout
+        deadline = time.monotonic() + self.DEFAULT_TIMEOUT
         while deadline > time.monotonic():
             if predicate():
                 return True
@@ -155,15 +152,13 @@ class DAPTestCaseBase(TestBase):
         if key in self.dap_server.capabilities:
             self.assertEqual(self.dap_server.capabilities[key], False, msg)
 
-    def verify_breakpoint_hit(
-        self, breakpoint_ids: List[Union[int, str]], timeout: float = DEFAULT_TIMEOUT
-    ):
+    def verify_breakpoint_hit(self, breakpoint_ids: List[Union[int, str]]):
         """Wait for the process we are debugging to stop, and verify we hit
         any breakpoint location in the "breakpoint_ids" array.
         "breakpoint_ids" should be a list of breakpoint ID strings
         (["1", "2"]). The return value from self.set_source_breakpoints()
         or self.set_function_breakpoints() can be passed to this function"""
-        stopped_events = self.dap_server.wait_for_stopped(timeout)
+        stopped_events = self.dap_server.wait_for_stopped()
         normalized_bp_ids = [str(b) for b in breakpoint_ids]
         for stopped_event in stopped_events:
             if "body" in stopped_event:
@@ -186,11 +181,11 @@ class DAPTestCaseBase(TestBase):
             f"breakpoint not hit, wanted breakpoint_ids {breakpoint_ids} in stopped_events {stopped_events}",
         )
 
-    def verify_all_breakpoints_hit(self, breakpoint_ids, timeout=DEFAULT_TIMEOUT):
+    def verify_all_breakpoints_hit(self, breakpoint_ids):
         """Wait for the process we are debugging to stop, and verify we hit
         all of the breakpoint locations in the "breakpoint_ids" array.
         "breakpoint_ids" should be a list of int breakpoint IDs ([1, 2])."""
-        stopped_events = self.dap_server.wait_for_stopped(timeout)
+        stopped_events = self.dap_server.wait_for_stopped()
         for stopped_event in stopped_events:
             if "body" in stopped_event:
                 body = stopped_event["body"]
@@ -208,12 +203,12 @@ class DAPTestCaseBase(TestBase):
                     return
         self.assertTrue(False, f"breakpoints not hit, stopped_events={stopped_events}")
 
-    def verify_stop_exception_info(self, expected_description, timeout=DEFAULT_TIMEOUT):
+    def verify_stop_exception_info(self, expected_description):
         """Wait for the process we are debugging to stop, and verify the stop
         reason is 'exception' and that the description matches
         'expected_description'
         """
-        stopped_events = self.dap_server.wait_for_stopped(timeout)
+        stopped_events = self.dap_server.wait_for_stopped()
         for stopped_event in stopped_events:
             if "body" in stopped_event:
                 body = stopped_event["body"]
@@ -338,26 +333,14 @@ class DAPTestCaseBase(TestBase):
     def get_important(self):
         return self.dap_server.get_output("important")
 
-    def collect_stdout(
-        self, timeout: float = DEFAULT_TIMEOUT, pattern: Optional[str] = None
-    ) -> str:
-        return self.dap_server.collect_output(
-            "stdout", timeout=timeout, pattern=pattern
-        )
+    def collect_stdout(self, pattern: Optional[str] = None) -> str:
+        return self.dap_server.collect_output("stdout", pattern=pattern)
 
-    def collect_console(
-        self, timeout: float = DEFAULT_TIMEOUT, pattern: Optional[str] = None
-    ) -> str:
-        return self.dap_server.collect_output(
-            "console", timeout=timeout, pattern=pattern
-        )
+    def collect_console(self, pattern: Optional[str] = None) -> str:
+        return self.dap_server.collect_output("console", pattern=pattern)
 
-    def collect_important(
-        self, timeout: float = DEFAULT_TIMEOUT, pattern: Optional[str] = None
-    ) -> str:
-        return self.dap_server.collect_output(
-            "important", timeout=timeout, pattern=pattern
-        )
+    def collect_important(self, pattern: Optional[str] = None) -> str:
+        return self.dap_server.collect_output("important", pattern=pattern)
 
     def get_local_as_int(self, name, threadId=None):
         value = self.dap_server.get_local_variable_value(name, threadId=threadId)
@@ -393,14 +376,13 @@ class DAPTestCaseBase(TestBase):
         targetId=None,
         waitForStop=True,
         granularity="statement",
-        timeout=DEFAULT_TIMEOUT,
     ):
         response = self.dap_server.request_stepIn(
             threadId=threadId, targetId=targetId, granularity=granularity
         )
         self.assertTrue(response["success"])
         if waitForStop:
-            return self.dap_server.wait_for_stopped(timeout)
+            return self.dap_server.wait_for_stopped()
         return None
 
     def stepOver(
@@ -408,7 +390,6 @@ class DAPTestCaseBase(TestBase):
         threadId=None,
         waitForStop=True,
         granularity="statement",
-        timeout=DEFAULT_TIMEOUT,
     ):
         response = self.dap_server.request_next(
             threadId=threadId, granularity=granularity
@@ -417,40 +398,40 @@ class DAPTestCaseBase(TestBase):
             response["success"], f"next request failed: response {response}"
         )
         if waitForStop:
-            return self.dap_server.wait_for_stopped(timeout)
+            return self.dap_server.wait_for_stopped()
         return None
 
-    def stepOut(self, threadId=None, waitForStop=True, timeout=DEFAULT_TIMEOUT):
+    def stepOut(self, threadId=None, waitForStop=True):
         self.dap_server.request_stepOut(threadId=threadId)
         if waitForStop:
-            return self.dap_server.wait_for_stopped(timeout)
+            return self.dap_server.wait_for_stopped()
         return None
 
     def do_continue(self):  # `continue` is a keyword.
         resp = self.dap_server.request_continue()
         self.assertTrue(resp["success"], f"continue request failed: {resp}")
 
-    def continue_to_next_stop(self, timeout=DEFAULT_TIMEOUT):
+    def continue_to_next_stop(self):
         self.do_continue()
-        return self.dap_server.wait_for_stopped(timeout)
+        return self.dap_server.wait_for_stopped()
 
-    def continue_to_breakpoint(self, breakpoint_id: str, timeout=DEFAULT_TIMEOUT):
-        self.continue_to_breakpoints((breakpoint_id), timeout)
+    def continue_to_breakpoint(self, breakpoint_id: str):
+        self.continue_to_breakpoints((breakpoint_id))
 
-    def continue_to_breakpoints(self, breakpoint_ids, timeout=DEFAULT_TIMEOUT):
+    def continue_to_breakpoints(self, breakpoint_ids):
         self.do_continue()
-        self.verify_breakpoint_hit(breakpoint_ids, timeout)
+        self.verify_breakpoint_hit(breakpoint_ids)
 
-    def continue_to_exception_breakpoint(self, filter_label, timeout=DEFAULT_TIMEOUT):
+    def continue_to_exception_breakpoint(self, filter_label):
         self.do_continue()
         self.assertTrue(
-            self.verify_stop_exception_info(filter_label, timeout),
+            self.verify_stop_exception_info(filter_label),
             'verify we got "%s"' % (filter_label),
         )
 
-    def continue_to_exit(self, exitCode=0, timeout=DEFAULT_TIMEOUT):
+    def continue_to_exit(self, exitCode=0):
         self.do_continue()
-        stopped_events = self.dap_server.wait_for_stopped(timeout)
+        stopped_events = self.dap_server.wait_for_stopped()
         self.assertEqual(
             len(stopped_events), 1, "stopped_events = {}".format(stopped_events)
         )
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
index aea6b9f..5ba642b 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
@@ -931,6 +931,7 @@ class GdbRemoteTestCaseBase(Base, metaclass=GdbRemoteTestCaseFactory):
         "QNonStop",
         "SupportedWatchpointTypes",
         "SupportedCompressions",
+        "MultiMemRead",
     ]
 
     def parse_qSupported_response(self, context):
diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm
index 7120892..96a282c 100644
--- a/lldb/source/Host/macosx/objcxx/Host.mm
+++ b/lldb/source/Host/macosx/objcxx/Host.mm
@@ -1221,13 +1221,12 @@ static Status LaunchProcessPosixSpawn(const char *exe_path,
     //
     using posix_spawnattr_set_use_sec_transition_shims_np_t =
         int (*)(posix_spawnattr_t *attr, uint32_t flags);
-    auto posix_spawnattr_set_use_sec_transition_shims_np_fn =
+    auto posix_spawnattr_enable_memory_tagging_fn =
         (posix_spawnattr_set_use_sec_transition_shims_np_t)dlsym(
             RTLD_DEFAULT, "posix_spawnattr_set_use_sec_transition_shims_np");
-    if (posix_spawnattr_set_use_sec_transition_shims_np_fn) {
-      error =
-          Status(posix_spawnattr_set_use_sec_transition_shims_np_fn(&attr, 0),
-                 eErrorTypePOSIX);
+    if (posix_spawnattr_enable_memory_tagging_fn) {
+      error = Status(posix_spawnattr_enable_memory_tagging_fn(&attr, 0),
+                     eErrorTypePOSIX);
       if (error.Fail()) {
         LLDB_LOG(log,
                  "error: {0}, "
diff --git a/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp
index e6c8c8b..7bf99ce 100644
--- a/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp
+++ b/lldb/source/Plugins/ABI/LoongArch/ABISysV_loongarch.cpp
@@ -597,15 +597,16 @@ bool ABISysV_loongarch::RegisterIsCalleeSaved(const RegisterInfo *reg_info) {
 
   return llvm::StringSwitch<bool>(name)
       // integer ABI names
-      .Cases("ra", "sp", "fp", true)
-      .Cases("s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", true)
+      .Cases({"ra", "sp", "fp"}, true)
+      .Cases({"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9"}, true)
       // integer hardware names
-      .Cases("r1", "r3", "r22", true)
-      .Cases("r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "31", true)
+      .Cases({"r1", "r3", "r22"}, true)
+      .Cases({"r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "31"},
+             true)
       // floating point ABI names
-      .Cases("fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", is_hw_fp)
+      .Cases({"fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"}, is_hw_fp)
       // floating point hardware names
-      .Cases("f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", is_hw_fp)
+      .Cases({"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"}, is_hw_fp)
       .Default(false);
 }
 
diff --git a/lldb/source/Plugins/ABI/RISCV/ABISysV_riscv.cpp b/lldb/source/Plugins/ABI/RISCV/ABISysV_riscv.cpp
index b313ca0..822c93d 100644
--- a/lldb/source/Plugins/ABI/RISCV/ABISysV_riscv.cpp
+++ b/lldb/source/Plugins/ABI/RISCV/ABISysV_riscv.cpp
@@ -783,21 +783,22 @@ bool ABISysV_riscv::RegisterIsCalleeSaved(const RegisterInfo *reg_info) {
   bool is_callee_saved =
       llvm::StringSwitch<bool>(name)
           // integer ABI names
-          .Cases("ra", "sp", "fp", true)
-          .Cases("s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9",
+          .Cases({"ra", "sp", "fp"}, true)
+          .Cases({"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9"},
                  true)
-          .Cases("s10", "s11", true)
+          .Cases({"s10", "s11"}, true)
           // integer hardware names
-          .Cases("x1", "x2", "x8", "x9", "x18", "x19", "x20", "x21", "x22",
+          .Cases({"x1", "x2", "x8", "x9", "x18", "x19", "x20", "x21", "x22"},
                  true)
-          .Cases("x23", "x24", "x25", "x26", "x27", true)
+          .Cases({"x23", "x24", "x25", "x26", "x27"}, true)
           // floating point ABI names
-          .Cases("fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7",
+          .Cases({"fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"},
                  is_hw_fp)
-          .Cases("fs8", "fs9", "fs10", "fs11", is_hw_fp)
+          .Cases({"fs8", "fs9", "fs10", "fs11"}, is_hw_fp)
           // floating point hardware names
-          .Cases("f8", "f9", "f18", "f19", "f20", "f21", "f22", "f23", is_hw_fp)
-          .Cases("f24", "f25", "f26", "f27", is_hw_fp)
+          .Cases({"f8", "f9", "f18", "f19", "f20", "f21", "f22", "f23"},
+                 is_hw_fp)
+          .Cases({"f24", "f25", "f26", "f27"}, is_hw_fp)
           .Default(false);
 
   return is_callee_saved;
diff --git a/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp b/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp
index 7646ccd..effb3de 100644
--- a/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp
+++ b/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp
@@ -929,8 +929,8 @@ bool ABISysV_x86_64::RegisterIsCalleeSaved(const RegisterInfo *reg_info) {
   std::string Name = std::string(reg_info->name);
   bool IsCalleeSaved =
       llvm::StringSwitch<bool>(Name)
-          .Cases("r12", "r13", "r14", "r15", "rbp", "ebp", "rbx", "ebx", true)
-          .Cases("rip", "eip", "rsp", "esp", "sp", "fp", "pc", true)
+          .Cases({"r12", "r13", "r14", "r15", "rbp", "ebp", "rbx", "ebx"}, true)
+          .Cases({"rip", "eip", "rsp", "esp", "sp", "fp", "pc"}, true)
           .Default(false);
   return IsCalleeSaved;
 }
diff --git a/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp b/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp
index 56df6f6..339012c 100644
--- a/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp
+++ b/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp
@@ -797,10 +797,11 @@ bool ABIWindows_x86_64::RegisterIsCalleeSaved(const RegisterInfo *reg_info) {
   std::string Name = std::string(reg_info->name);
   bool IsCalleeSaved =
       llvm::StringSwitch<bool>(Name)
-          .Cases("rbx", "ebx", "rbp", "ebp", "rdi", "edi", "rsi", "esi", true)
-          .Cases("rsp", "esp", "r12", "r13", "r14", "r15", "sp", "fp", true)
-          .Cases("xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12",
-                 "xmm13", "xmm14", "xmm15", true)
+          .Cases({"rbx", "ebx", "rbp", "ebp", "rdi", "edi", "rsi", "esi"}, true)
+          .Cases({"rsp", "esp", "r12", "r13", "r14", "r15", "sp", "fp"}, true)
+          .Cases({"xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12",
+                  "xmm13", "xmm14", "xmm15"},
+                 true)
           .Default(false);
   return IsCalleeSaved;
 }
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp
index 92094c0..e3b6ff8 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp
@@ -332,8 +332,7 @@ CompilerType ClangASTImporter::DeportType(TypeSystemClang &dst,
   DeclContextOverride decl_context_override;
 
   if (auto *t = ClangUtil::GetQualType(src_type)->getAs<TagType>())
-    decl_context_override.OverrideAllDeclsFromContainingFunction(
-        t->getOriginalDecl());
+    decl_context_override.OverrideAllDeclsFromContainingFunction(t->getDecl());
 
   CompleteTagDeclsScope complete_scope(*this, &dst.getASTContext(),
                                        &src_ctxt->getASTContext());
@@ -393,7 +392,7 @@ bool ClangASTImporter::CanImport(const CompilerType &type) {
   case clang::Type::Record:
     return CanImport(qual_type->getAsCXXRecordDecl());
   case clang::Type::Enum:
-    return CanImport(llvm::cast<clang::EnumType>(qual_type)->getOriginalDecl());
+    return CanImport(llvm::cast<clang::EnumType>(qual_type)->getDecl());
   case clang::Type::ObjCObject:
   case clang::Type::ObjCInterface: {
     const clang::ObjCObjectType *objc_class_type =
@@ -452,7 +451,7 @@ bool ClangASTImporter::Import(const CompilerType &type) {
 
   case clang::Type::Enum: {
     clang::EnumDecl *enum_decl =
-        llvm::cast<clang::EnumType>(qual_type)->getOriginalDecl();
+        llvm::cast<clang::EnumType>(qual_type)->getDecl();
     if (enum_decl) {
       if (GetDeclOrigin(enum_decl).Valid())
         return CompleteAndFetchChildren(qual_type);
@@ -591,7 +590,7 @@ bool ExtractBaseOffsets(const ASTRecordLayout &record_layout,
       return false;
 
     DeclFromUser<RecordDecl> origin_base_record(
-        origin_base_record_type->getOriginalDecl());
+        origin_base_record_type->getDecl());
 
     if (origin_base_record.IsInvalid())
       return false;
@@ -722,8 +721,7 @@ bool ClangASTImporter::importRecordLayoutFromOrigin(
 
         QualType base_type = bi->getType();
         const RecordType *base_record_type = base_type->getAs<RecordType>();
-        DeclFromParser<RecordDecl> base_record(
-            base_record_type->getOriginalDecl());
+        DeclFromParser<RecordDecl> base_record(base_record_type->getDecl());
         DeclFromParser<CXXRecordDecl> base_cxx_record =
             DynCast<CXXRecordDecl>(base_record);
 
@@ -855,7 +853,7 @@ bool ClangASTImporter::CompleteAndFetchChildren(clang::QualType type) {
   Log *log = GetLog(LLDBLog::Expressions);
 
   if (const TagType *tag_type = type->getAs<TagType>()) {
-    TagDecl *tag_decl = tag_type->getOriginalDecl();
+    TagDecl *tag_decl = tag_type->getDecl();
 
     DeclOrigin decl_origin = GetDeclOrigin(tag_decl);
 
@@ -923,7 +921,7 @@ bool ClangASTImporter::RequireCompleteType(clang::QualType type) {
     return false;
 
   if (const TagType *tag_type = type->getAs<TagType>()) {
-    TagDecl *tag_decl = tag_type->getOriginalDecl();
+    TagDecl *tag_decl = tag_type->getDecl();
 
     if (tag_decl->getDefinition())
       return true;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
index 21a9307..ebe7be4 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
@@ -223,7 +223,7 @@ TagDecl *ClangASTSource::FindCompleteType(const TagDecl *decl) {
           continue;
 
         TagDecl *candidate_tag_decl =
-            tag_type->getOriginalDecl()->getDefinitionOrSelf();
+            tag_type->getDecl()->getDefinitionOrSelf();
 
         if (TypeSystemClang::GetCompleteDecl(
                 &candidate_tag_decl->getASTContext(), candidate_tag_decl))
@@ -250,8 +250,7 @@ TagDecl *ClangASTSource::FindCompleteType(const TagDecl *decl) {
       if (!tag_type)
         continue;
 
-      TagDecl *candidate_tag_decl =
-          tag_type->getOriginalDecl()->getDefinitionOrSelf();
+      TagDecl *candidate_tag_decl = tag_type->getDecl()->getDefinitionOrSelf();
 
       if (TypeSystemClang::GetCompleteDecl(&candidate_tag_decl->getASTContext(),
                                            candidate_tag_decl))
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index 8a68282..833bc3b 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -1561,7 +1561,7 @@ ClangExpressionDeclMap::AddExpressionVariable(NameSearchContext &context,
 
   if (const clang::Type *parser_type = parser_opaque_type.getTypePtr()) {
     if (const TagType *tag_type = dyn_cast<TagType>(parser_type))
-      CompleteType(tag_type->getOriginalDecl()->getDefinitionOrSelf());
+      CompleteType(tag_type->getDecl()->getDefinitionOrSelf());
     if (const ObjCObjectPointerType *objc_object_ptr_type =
             dyn_cast<ObjCObjectPointerType>(parser_type))
       CompleteType(objc_object_ptr_type->getInterfaceDecl());
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp b/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp
index 6f57c18..794b194 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp
@@ -153,7 +153,7 @@ NameSearchContext::AddTypeDecl(const CompilerType &clang_type) {
 
       return (NamedDecl *)typedef_name_decl;
     } else if (const TagType *tag_type = qual_type->getAs<TagType>()) {
-      TagDecl *tag_decl = tag_type->getOriginalDecl()->getDefinitionOrSelf();
+      TagDecl *tag_decl = tag_type->getDecl()->getDefinitionOrSelf();
 
       m_decls.push_back(tag_decl);
 
diff --git a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlAtomic.cpp b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlAtomic.cpp
index c871861..020ba1016 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/MsvcStlAtomic.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/MsvcStlAtomic.cpp
@@ -50,7 +50,7 @@ llvm::Expected<uint32_t> lldb_private::formatters::
 lldb::ValueObjectSP
 lldb_private::formatters::MsvcStlAtomicSyntheticFrontEnd::GetChildAtIndex(
     uint32_t idx) {
-  if (idx == 0)
+  if (idx == 0 && m_storage && m_element_type.IsValid())
     return m_storage->Cast(m_element_type)->Clone(ConstString("Value"));
   return nullptr;
 }
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
index 6d8f41a..460c503 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
@@ -260,6 +260,7 @@ bool ClassDescriptorV2::method_list_t::Read(Process *process,
   uint32_t entsize = extractor.GetU32_unchecked(&cursor);
   m_is_small = (entsize & 0x80000000) != 0;
   m_has_direct_selector = (entsize & 0x40000000) != 0;
+  m_has_relative_types = (entsize & 0x20000000) != 0;
   m_entsize = entsize & 0xfffc;
   m_count = extractor.GetU32_unchecked(&cursor);
   m_first_ptr = addr + cursor;
@@ -269,8 +270,9 @@ bool ClassDescriptorV2::method_list_t::Read(Process *process,
 
 llvm::SmallVector<ClassDescriptorV2::method_t, 0>
 ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
-                               lldb::addr_t relative_selector_base_addr,
-                               bool is_small, bool has_direct_sel) const {
+                               lldb::addr_t relative_string_base_addr,
+                               bool is_small, bool has_direct_sel,
+                               bool has_relative_types) const {
   lldb_private::Process *process = m_runtime.GetProcess();
   if (!process)
     return {};
@@ -297,8 +299,8 @@ ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
                             process->GetByteOrder(),
                             process->GetAddressByteSize());
     methods.push_back(method_t());
-    methods.back().Read(extractor, process, addr, relative_selector_base_addr,
-                        is_small, has_direct_sel);
+    methods.back().Read(extractor, process, addr, relative_string_base_addr,
+                        is_small, has_direct_sel, has_relative_types);
   }
 
   return methods;
@@ -306,8 +308,9 @@ ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
 
 bool ClassDescriptorV2::method_t::Read(DataExtractor &extractor,
                                        Process *process, lldb::addr_t addr,
-                                       lldb::addr_t relative_selector_base_addr,
-                                       bool is_small, bool has_direct_sel) {
+                                       lldb::addr_t relative_string_base_addr,
+                                       bool is_small, bool has_direct_sel,
+                                       bool has_relative_types) {
   lldb::offset_t cursor = 0;
 
   if (is_small) {
@@ -323,10 +326,13 @@ bool ClassDescriptorV2::method_t::Read(DataExtractor &extractor,
       m_name_ptr = process->ReadPointerFromMemory(m_name_ptr, error);
       if (error.Fail())
         return false;
-    } else if (relative_selector_base_addr != LLDB_INVALID_ADDRESS) {
-      m_name_ptr = relative_selector_base_addr + nameref_offset;
+    } else if (relative_string_base_addr != LLDB_INVALID_ADDRESS) {
+      m_name_ptr = relative_string_base_addr + nameref_offset;
     }
-    m_types_ptr = addr + 4 + types_offset;
+    if (has_relative_types)
+      m_types_ptr = relative_string_base_addr + types_offset;
+    else
+      m_types_ptr = addr + 4 + types_offset;
     m_imp_ptr = addr + 8 + imp_offset;
   } else {
     m_name_ptr = extractor.GetAddress_unchecked(&cursor);
@@ -481,7 +487,8 @@ bool ClassDescriptorV2::ProcessMethodList(
 
   llvm::SmallVector<method_t, 0> methods =
       ReadMethods(addresses, m_runtime.GetRelativeSelectorBaseAddr(),
-                  method_list.m_is_small, method_list.m_has_direct_selector);
+                  method_list.m_is_small, method_list.m_has_direct_selector,
+                  method_list.m_has_relative_types);
 
   for (const auto &method : methods)
     if (instance_method_func(method.m_name.c_str(), method.m_types.c_str()))
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
index 78b3311..0fff9af 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
@@ -143,6 +143,7 @@ private:
     uint16_t m_entsize;
     bool m_is_small;
     bool m_has_direct_selector;
+    bool m_has_relative_types;
     uint32_t m_count;
     lldb::addr_t m_first_ptr;
 
@@ -173,14 +174,14 @@ private:
     }
 
     bool Read(DataExtractor &extractor, Process *process, lldb::addr_t addr,
-              lldb::addr_t relative_selector_base_addr, bool is_small,
-              bool has_direct_sel);
+              lldb::addr_t relative_string_base_addr, bool is_small,
+              bool has_direct_sel, bool has_relative_types);
   };
 
   llvm::SmallVector<method_t, 0>
   ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
-              lldb::addr_t relative_selector_base_addr, bool is_small,
-              bool has_direct_sel) const;
+              lldb::addr_t relative_string_base_addr, bool is_small,
+              bool has_direct_sel, bool has_relative_types) const;
 
   struct ivar_list_t {
     uint32_t m_entsize;
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index cd72454..5aad447 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -1150,7 +1150,7 @@ void PlatformDarwin::AddClangModuleCompilationOptionsForSDKType(
     case XcodeSDK::Type::XRSimulator:
     case XcodeSDK::Type::XROS:
       // FIXME: Pass the right argument once it exists.
-    case XcodeSDK::Type::bridgeOS:
+    case XcodeSDK::Type::BridgeOS:
     case XcodeSDK::Type::Linux:
     case XcodeSDK::Type::unknown:
       if (Log *log = GetLog(LLDBLog::Host)) {
diff --git a/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp b/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp
index 15981a2..a8d18f7 100644
--- a/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp
+++ b/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp
@@ -47,7 +47,7 @@ void GDBRemoteSignals::Reset() {
   AddSignal(25,     "SIGXFSZ",      false,    true,   true,   "file size limit exceeded");
   AddSignal(26,     "SIGVTALRM",    false,    true,   true,   "virtual time alarm");
   AddSignal(27,     "SIGPROF",      false,    false,  false,  "profiling time alarm");
-  AddSignal(28,     "SIGWINCH",     false,    true,   true,   "window size changes");
+  AddSignal(28,     "SIGWINCH",     false,    false,   false,   "window size changes");
   AddSignal(29,     "SIGLOST",      false,    true,   true,   "resource lost");
   AddSignal(30,     "SIGUSR1",      false,    true,   true,   "user defined signal 1");
   AddSignal(31,     "SIGUSR2",      false,    true,   true,   "user defined signal 2");
diff --git a/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp b/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp
index 5346bab..dbbfc6a 100644
--- a/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp
+++ b/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp
@@ -160,7 +160,7 @@ void LinuxSignals::Reset() {
   ADD_LINUX_SIGNAL(25,     "SIGXFSZ",      false,    true,   true,   "file size limit exceeded");
   ADD_LINUX_SIGNAL(26,     "SIGVTALRM",    false,    true,   true,   "virtual time alarm");
   ADD_LINUX_SIGNAL(27,     "SIGPROF",      false,    false,  false,  "profiling time alarm");
-  ADD_LINUX_SIGNAL(28,     "SIGWINCH",     false,    true,   true,   "window size changes");
+  ADD_LINUX_SIGNAL(28,     "SIGWINCH",     false,    false,   false,   "window size changes");
   ADD_LINUX_SIGNAL(29,     "SIGIO",        false,    true,   true,   "input/output ready/Pollable event", "SIGPOLL");
   ADD_LINUX_SIGNAL(30,     "SIGPWR",       false,    true,   true,   "power failure");
   ADD_LINUX_SIGNAL(31,     "SIGSYS",       false,    true,   true,   "invalid system call");
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp
index e0f3971..c361b2a 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp
@@ -9,6 +9,7 @@
 #include "RegisterContextFreeBSD_x86_64.h"
 #include "RegisterContextFreeBSD_i386.h"
 #include "RegisterContextPOSIX_x86.h"
+#include "llvm/Support/Threading.h"
 #include <vector>
 
 using namespace lldb_private;
@@ -69,40 +70,34 @@ struct UserArea {
 #include "RegisterInfos_x86_64.h"
 #undef DECLARE_REGISTER_INFOS_X86_64_STRUCT
 
-static std::vector<lldb_private::RegisterInfo> &GetSharedRegisterInfoVector() {
-  static std::vector<lldb_private::RegisterInfo> register_infos;
-  return register_infos;
-}
-
-static const RegisterInfo *
-GetRegisterInfo_i386(const lldb_private::ArchSpec &arch) {
-  static std::vector<lldb_private::RegisterInfo> g_register_infos(
-      GetSharedRegisterInfoVector());
-
-  // Allocate RegisterInfo only once
-  if (g_register_infos.empty()) {
-    // Copy the register information from base class
-    std::unique_ptr<RegisterContextFreeBSD_i386> reg_interface(
-        new RegisterContextFreeBSD_i386(arch));
-    const RegisterInfo *base_info = reg_interface->GetRegisterInfo();
-    g_register_infos.insert(g_register_infos.end(), &base_info[0],
-                            &base_info[k_num_registers_i386]);
+static std::vector<lldb_private::RegisterInfo> &
+GetSharedRegisterInfoVector_i386(const lldb_private::ArchSpec &arch) {
+  static std::vector<lldb_private::RegisterInfo> g_register_infos;
+  static llvm::once_flag g_initialized;
+  llvm::call_once(g_initialized, [&]() {
+    if (g_register_infos.empty()) {
+      // Copy the register information from base class
+      std::unique_ptr<RegisterContextFreeBSD_i386> reg_interface(
+          new RegisterContextFreeBSD_i386(arch));
+      const RegisterInfo *base_info = reg_interface->GetRegisterInfo();
+      g_register_infos.insert(g_register_infos.end(), &base_info[0],
+                              &base_info[k_num_registers_i386]);
 
 // Include RegisterInfos_x86_64 to update the g_register_infos structure
 //  with x86_64 offsets.
 #define UPDATE_REGISTER_INFOS_I386_STRUCT_WITH_X86_64_OFFSETS
 #include "RegisterInfos_x86_64.h"
 #undef UPDATE_REGISTER_INFOS_I386_STRUCT_WITH_X86_64_OFFSETS
-  }
-
-  return &g_register_infos[0];
+    }
+  });
+  return g_register_infos;
 }
 
 static const RegisterInfo *
 PrivateGetRegisterInfoPtr(const lldb_private::ArchSpec &target_arch) {
   switch (target_arch.GetMachine()) {
   case llvm::Triple::x86:
-    return GetRegisterInfo_i386(target_arch);
+    return &GetSharedRegisterInfoVector_i386(target_arch)[0];
   case llvm::Triple::x86_64:
     return g_register_infos_x86_64;
   default:
@@ -116,9 +111,10 @@ PrivateGetRegisterCount(const lldb_private::ArchSpec &target_arch) {
   switch (target_arch.GetMachine()) {
   case llvm::Triple::x86:
     // This vector should have already been filled.
-    assert(!GetSharedRegisterInfoVector().empty() &&
+    assert(!GetSharedRegisterInfoVector_i386(target_arch).empty() &&
            "i386 register info vector not filled.");
-    return static_cast<uint32_t>(GetSharedRegisterInfoVector().size());
+    return static_cast<uint32_t>(
+        GetSharedRegisterInfoVector_i386(target_arch).size());
   case llvm::Triple::x86_64:
     return static_cast<uint32_t>(sizeof(g_register_infos_x86_64) /
                                  sizeof(g_register_infos_x86_64[0]));
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index 7d2bd45..11f164c 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -211,6 +211,12 @@ bool GDBRemoteCommunicationClient::GetReverseStepSupported() {
   return m_supports_reverse_step == eLazyBoolYes;
 }
 
+bool GDBRemoteCommunicationClient::GetMultiMemReadSupported() {
+  if (m_supports_multi_mem_read == eLazyBoolCalculate)
+    GetRemoteQSupported();
+  return m_supports_multi_mem_read == eLazyBoolYes;
+}
+
 bool GDBRemoteCommunicationClient::QueryNoAckModeSupported() {
   if (m_supports_not_sending_acks == eLazyBoolCalculate) {
     m_send_acks = true;
@@ -339,6 +345,7 @@ void GDBRemoteCommunicationClient::ResetDiscoverableSettings(bool did_exec) {
     m_supported_async_json_packets_is_valid = false;
     m_supported_async_json_packets_sp.reset();
     m_supports_jModulesInfo = true;
+    m_supports_multi_mem_read = eLazyBoolCalculate;
   }
 
   // These flags should be reset when we first connect to a GDB server and when
@@ -365,6 +372,7 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() {
   m_x_packet_state.reset();
   m_supports_reverse_continue = eLazyBoolNo;
   m_supports_reverse_step = eLazyBoolNo;
+  m_supports_multi_mem_read = eLazyBoolNo;
 
   m_max_packet_size = UINT64_MAX; // It's supposed to always be there, but if
                                   // not, we assume no limit
@@ -424,6 +432,8 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() {
         m_supports_reverse_continue = eLazyBoolYes;
       else if (x == "ReverseStep+")
         m_supports_reverse_step = eLazyBoolYes;
+      else if (x == "MultiMemRead+")
+        m_supports_multi_mem_read = eLazyBoolYes;
       // Look for a list of compressions in the features list e.g.
       // qXfer:features:read+;PacketSize=20000;qEcho+;SupportedCompressions=zlib-
       // deflate,lzma
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
index a765e95..ad590a2 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
@@ -342,6 +342,8 @@ public:
 
   bool GetReverseStepSupported();
 
+  bool GetMultiMemReadSupported();
+
   LazyBool SupportsAllocDeallocMemory() // const
   {
     // Uncomment this to have lldb pretend the debug server doesn't respond to
@@ -574,6 +576,7 @@ protected:
   std::optional<xPacketState> m_x_packet_state;
   LazyBool m_supports_reverse_continue = eLazyBoolCalculate;
   LazyBool m_supports_reverse_step = eLazyBoolCalculate;
+  LazyBool m_supports_multi_mem_read = eLazyBoolCalculate;
 
   bool m_supports_qProcessInfoPID : 1, m_supports_qfProcessInfo : 1,
       m_supports_qUserName : 1, m_supports_qGroupName : 1,
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp
index 888bd89..6c66d86 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp
@@ -946,17 +946,21 @@ lldb_private::npdb::GetCompilerTypeForSimpleKind(SimpleTypeKind kind) {
   case SimpleTypeKind::Complex64:
     return lldb::eBasicTypeDoubleComplex;
   case SimpleTypeKind::Complex32:
+  case SimpleTypeKind::Complex32PartialPrecision:
     return lldb::eBasicTypeFloatComplex;
-  case SimpleTypeKind::Float128:
   case SimpleTypeKind::Float80:
     return lldb::eBasicTypeLongDouble;
+  case SimpleTypeKind::Float128:
+    return lldb::eBasicTypeFloat128;
   case SimpleTypeKind::Float64:
     return lldb::eBasicTypeDouble;
   case SimpleTypeKind::Float32:
+  case SimpleTypeKind::Float32PartialPrecision:
     return lldb::eBasicTypeFloat;
   case SimpleTypeKind::Float16:
     return lldb::eBasicTypeHalf;
   case SimpleTypeKind::Int128:
+  case SimpleTypeKind::Int128Oct:
     return lldb::eBasicTypeInt128;
   case SimpleTypeKind::Int64:
   case SimpleTypeKind::Int64Quad:
@@ -967,6 +971,7 @@ lldb_private::npdb::GetCompilerTypeForSimpleKind(SimpleTypeKind kind) {
   case SimpleTypeKind::Int16Short:
     return lldb::eBasicTypeShort;
   case SimpleTypeKind::UInt128:
+  case SimpleTypeKind::UInt128Oct:
     return lldb::eBasicTypeUnsignedInt128;
   case SimpleTypeKind::UInt64:
   case SimpleTypeKind::UInt64Quad:
@@ -985,16 +990,27 @@ lldb_private::npdb::GetCompilerTypeForSimpleKind(SimpleTypeKind kind) {
     return lldb::eBasicTypeVoid;
   case SimpleTypeKind::WideCharacter:
     return lldb::eBasicTypeWChar;
-  default:
+
+  // Not supported.
+  case SimpleTypeKind::Float48:
+  case SimpleTypeKind::Complex16:
+  case SimpleTypeKind::Complex48:
+  case SimpleTypeKind::Complex128:
+  case SimpleTypeKind::NotTranslated:
+  case SimpleTypeKind::None:
     return lldb::eBasicTypeInvalid;
   }
+  return lldb::eBasicTypeInvalid;
 }
 
 size_t lldb_private::npdb::GetTypeSizeForSimpleKind(SimpleTypeKind kind) {
   switch (kind) {
   case SimpleTypeKind::Boolean128:
+  case SimpleTypeKind::Complex128:
   case SimpleTypeKind::Int128:
+  case SimpleTypeKind::Int128Oct:
   case SimpleTypeKind::UInt128:
+  case SimpleTypeKind::UInt128Oct:
   case SimpleTypeKind::Float128:
     return 16;
   case SimpleTypeKind::Complex80:
@@ -1008,10 +1024,15 @@ size_t lldb_private::npdb::GetTypeSizeForSimpleKind(SimpleTypeKind kind) {
   case SimpleTypeKind::Int64:
   case SimpleTypeKind::Int64Quad:
     return 8;
+  case SimpleTypeKind::Complex48:
+  case SimpleTypeKind::Float48:
+    return 6;
   case SimpleTypeKind::Boolean32:
   case SimpleTypeKind::Character32:
   case SimpleTypeKind::Complex32:
+  case SimpleTypeKind::Complex32PartialPrecision:
   case SimpleTypeKind::Float32:
+  case SimpleTypeKind::Float32PartialPrecision:
   case SimpleTypeKind::Int32:
   case SimpleTypeKind::Int32Long:
   case SimpleTypeKind::UInt32Long:
@@ -1020,6 +1041,7 @@ size_t lldb_private::npdb::GetTypeSizeForSimpleKind(SimpleTypeKind kind) {
     return 4;
   case SimpleTypeKind::Boolean16:
   case SimpleTypeKind::Character16:
+  case SimpleTypeKind::Complex16:
   case SimpleTypeKind::Float16:
   case SimpleTypeKind::Int16:
   case SimpleTypeKind::Int16Short:
@@ -1035,10 +1057,13 @@ size_t lldb_private::npdb::GetTypeSizeForSimpleKind(SimpleTypeKind kind) {
   case SimpleTypeKind::SByte:
   case SimpleTypeKind::Character8:
     return 1;
+
   case SimpleTypeKind::Void:
-  default:
+  case SimpleTypeKind::None:
+  case SimpleTypeKind::NotTranslated:
     return 0;
   }
+  return 0;
 }
 
 PdbTypeSymId lldb_private::npdb::GetBestPossibleDecl(PdbTypeSymId id,
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index 7e275f1..ecd3188 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -152,14 +152,24 @@ static bool IsFunctionEpilogue(const CompilandIndexItem &cci,
   return false;
 }
 
+// See llvm::codeview::TypeIndex::simpleTypeName as well as strForPrimitiveTi
+// from the original pdbdump:
+// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/pdbdump/pdbdump.cpp#L1896-L1974
+//
+// For 64bit integers we use "long long" like DIA instead of "__int64".
 static llvm::StringRef GetSimpleTypeName(SimpleTypeKind kind) {
   switch (kind) {
   case SimpleTypeKind::Boolean128:
-  case SimpleTypeKind::Boolean16:
-  case SimpleTypeKind::Boolean32:
+    return "__bool128";
   case SimpleTypeKind::Boolean64:
+    return "__bool64";
+  case SimpleTypeKind::Boolean32:
+    return "__bool32";
+  case SimpleTypeKind::Boolean16:
+    return "__bool16";
   case SimpleTypeKind::Boolean8:
     return "bool";
+
   case SimpleTypeKind::Byte:
   case SimpleTypeKind::UnsignedCharacter:
     return "unsigned char";
@@ -168,57 +178,81 @@ static llvm::StringRef GetSimpleTypeName(SimpleTypeKind kind) {
   case SimpleTypeKind::SignedCharacter:
   case SimpleTypeKind::SByte:
     return "signed char";
-  case SimpleTypeKind::Character16:
-    return "char16_t";
   case SimpleTypeKind::Character32:
     return "char32_t";
+  case SimpleTypeKind::Character16:
+    return "char16_t";
   case SimpleTypeKind::Character8:
     return "char8_t";
+
+  case SimpleTypeKind::Complex128:
+    return "_Complex __float128";
   case SimpleTypeKind::Complex80:
+    return "_Complex long double";
   case SimpleTypeKind::Complex64:
+    return "_Complex double";
+  case SimpleTypeKind::Complex48:
+    return "_Complex __float48";
   case SimpleTypeKind::Complex32:
-    return "complex";
+  case SimpleTypeKind::Complex32PartialPrecision:
+    return "_Complex float";
+  case SimpleTypeKind::Complex16:
+    return "_Complex _Float16";
+
   case SimpleTypeKind::Float128:
+    return "__float128";
   case SimpleTypeKind::Float80:
     return "long double";
   case SimpleTypeKind::Float64:
     return "double";
+  case SimpleTypeKind::Float48:
+    return "__float48";
   case SimpleTypeKind::Float32:
+  case SimpleTypeKind::Float32PartialPrecision:
     return "float";
   case SimpleTypeKind::Float16:
-    return "single";
+    return "_Float16";
+
+  case SimpleTypeKind::Int128Oct:
   case SimpleTypeKind::Int128:
     return "__int128";
   case SimpleTypeKind::Int64:
   case SimpleTypeKind::Int64Quad:
-    return "int64_t";
+    return "long long";
+  case SimpleTypeKind::Int32Long:
+    return "long";
   case SimpleTypeKind::Int32:
     return "int";
   case SimpleTypeKind::Int16:
+  case SimpleTypeKind::Int16Short:
     return "short";
+
+  case SimpleTypeKind::UInt128Oct:
   case SimpleTypeKind::UInt128:
     return "unsigned __int128";
   case SimpleTypeKind::UInt64:
   case SimpleTypeKind::UInt64Quad:
-    return "uint64_t";
-  case SimpleTypeKind::HResult:
-    return "HRESULT";
+    return "unsigned long long";
   case SimpleTypeKind::UInt32:
     return "unsigned";
   case SimpleTypeKind::UInt16:
   case SimpleTypeKind::UInt16Short:
     return "unsigned short";
-  case SimpleTypeKind::Int32Long:
-    return "long";
   case SimpleTypeKind::UInt32Long:
     return "unsigned long";
+
+  case SimpleTypeKind::HResult:
+    return "HRESULT";
   case SimpleTypeKind::Void:
     return "void";
   case SimpleTypeKind::WideCharacter:
     return "wchar_t";
-  default:
+
+  case SimpleTypeKind::None:
+  case SimpleTypeKind::NotTranslated:
     return "";
   }
+  return "";
 }
 
 static bool IsClassRecord(TypeLeafKind kind) {
@@ -598,8 +632,8 @@ lldb::TypeSP SymbolFileNativePDB::CreateSimpleType(TypeIndex ti,
   uint64_t uid = toOpaqueUid(PdbTypeSymId(ti, false));
   if (ti == TypeIndex::NullptrT()) {
     Declaration decl;
-    return MakeType(uid, ConstString("std::nullptr_t"), 0, nullptr,
-                    LLDB_INVALID_UID, Type::eEncodingIsUID, decl, ct,
+    return MakeType(uid, ConstString("decltype(nullptr)"), std::nullopt,
+                    nullptr, LLDB_INVALID_UID, Type::eEncodingIsUID, decl, ct,
                     Type::ResolveState::Full);
   }
 
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 12cabff..82dfe7e 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -2629,7 +2629,7 @@ TypeSystemClang::GetDeclContextForType(clang::QualType type) {
   case clang::Type::Enum:
   case clang::Type::Record:
     return llvm::cast<clang::TagType>(qual_type)
-        ->getOriginalDecl()
+        ->getDecl()
         ->getDefinitionOrSelf();
   default:
     break;
@@ -2879,8 +2879,7 @@ bool TypeSystemClang::IsAnonymousType(lldb::opaque_compiler_type_t type) {
     if (const clang::RecordType *record_type =
             llvm::dyn_cast_or_null<clang::RecordType>(
                 qual_type.getTypePtrOrNull())) {
-      if (const clang::RecordDecl *record_decl =
-              record_type->getOriginalDecl()) {
+      if (const clang::RecordDecl *record_decl = record_type->getDecl()) {
         return record_decl->isAnonymousStructOrUnion();
       }
     }
@@ -3121,7 +3120,7 @@ TypeSystemClang::IsHomogeneousAggregate(lldb::opaque_compiler_type_t type,
           llvm::cast<clang::RecordType>(qual_type.getTypePtr());
       if (record_type) {
         if (const clang::RecordDecl *record_decl =
-                record_type->getOriginalDecl()->getDefinition()) {
+                record_type->getDecl()->getDefinition()) {
           // We are looking for a structure that contains only floating point
           // types
           clang::RecordDecl::field_iterator field_pos,
@@ -3301,7 +3300,7 @@ bool TypeSystemClang::IsEnumerationType(lldb::opaque_compiler_type_t type,
         GetCanonicalQualType(type)->getCanonicalTypeInternal());
 
     if (enum_type) {
-      IsIntegerType(enum_type->getOriginalDecl()
+      IsIntegerType(enum_type->getDecl()
                         ->getDefinitionOrSelf()
                         ->getIntegerType()
                         .getAsOpaquePtr(),
@@ -3529,7 +3528,7 @@ bool TypeSystemClang::IsDefined(lldb::opaque_compiler_type_t type) {
   const clang::TagType *tag_type =
       llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr());
   if (tag_type) {
-    if (clang::TagDecl *tag_decl = tag_type->getOriginalDecl()->getDefinition())
+    if (clang::TagDecl *tag_decl = tag_type->getDecl()->getDefinition())
       return tag_decl->isCompleteDefinition();
     return false;
   } else {
@@ -3782,7 +3781,7 @@ bool TypeSystemClang::IsBeingDefined(lldb::opaque_compiler_type_t type) {
   clang::QualType qual_type(GetCanonicalQualType(type));
   const clang::TagType *tag_type = llvm::dyn_cast<clang::TagType>(qual_type);
   if (tag_type)
-    return tag_type->getOriginalDecl()->isEntityBeingDefined();
+    return tag_type->getDecl()->isEntityBeingDefined();
   return false;
 }
 
@@ -3988,7 +3987,7 @@ TypeSystemClang::GetTypeInfo(lldb::opaque_compiler_type_t type,
     if (pointee_or_element_clang_type)
       pointee_or_element_clang_type->SetCompilerType(
           weak_from_this(), llvm::cast<clang::EnumType>(qual_type)
-                                ->getOriginalDecl()
+                                ->getDecl()
                                 ->getDefinitionOrSelf()
                                 ->getIntegerType()
                                 .getAsOpaquePtr());
@@ -4228,7 +4227,7 @@ TypeSystemClang::GetTypeClass(lldb::opaque_compiler_type_t type) {
   case clang::Type::Record: {
     const clang::RecordType *record_type =
         llvm::cast<clang::RecordType>(qual_type.getTypePtr());
-    const clang::RecordDecl *record_decl = record_type->getOriginalDecl();
+    const clang::RecordDecl *record_decl = record_type->getDecl();
     if (record_decl->isUnion())
       return lldb::eTypeClassUnion;
     else if (record_decl->isStruct())
@@ -4704,9 +4703,9 @@ CompilerType TypeSystemClang::CreateTypedef(
     clang::TagDecl *tdecl = nullptr;
     if (!qual_type.isNull()) {
       if (const clang::RecordType *rt = qual_type->getAs<clang::RecordType>())
-        tdecl = rt->getOriginalDecl();
+        tdecl = rt->getDecl();
       if (const clang::EnumType *et = qual_type->getAs<clang::EnumType>())
-        tdecl = et->getOriginalDecl();
+        tdecl = et->getDecl();
     }
 
     // Check whether this declaration is an anonymous struct, union, or enum,
@@ -5386,7 +5385,7 @@ TypeSystemClang::GetNumChildren(lldb::opaque_compiler_type_t type,
       const clang::RecordType *record_type =
           llvm::cast<clang::RecordType>(qual_type.getTypePtr());
       const clang::RecordDecl *record_decl =
-          record_type->getOriginalDecl()->getDefinitionOrSelf();
+          record_type->getDecl()->getDefinitionOrSelf();
       const clang::CXXRecordDecl *cxx_record_decl =
           llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
 
@@ -5583,7 +5582,7 @@ void TypeSystemClang::ForEachEnumerator(
       llvm::dyn_cast<clang::EnumType>(GetCanonicalQualType(type));
   if (enum_type) {
     const clang::EnumDecl *enum_decl =
-        enum_type->getOriginalDecl()->getDefinitionOrSelf();
+        enum_type->getDecl()->getDefinitionOrSelf();
     if (enum_decl) {
       CompilerType integer_type = GetType(enum_decl->getIntegerType());
 
@@ -5615,7 +5614,7 @@ uint32_t TypeSystemClang::GetNumFields(lldb::opaque_compiler_type_t type) {
           llvm::dyn_cast<clang::RecordType>(qual_type.getTypePtr());
       if (record_type) {
         clang::RecordDecl *record_decl =
-            record_type->getOriginalDecl()->getDefinition();
+            record_type->getDecl()->getDefinition();
         if (record_decl) {
           count = std::distance(record_decl->field_begin(),
                                 record_decl->field_end());
@@ -5730,7 +5729,7 @@ CompilerType TypeSystemClang::GetFieldAtIndex(lldb::opaque_compiler_type_t type,
       const clang::RecordType *record_type =
           llvm::cast<clang::RecordType>(qual_type.getTypePtr());
       const clang::RecordDecl *record_decl =
-          record_type->getOriginalDecl()->getDefinitionOrSelf();
+          record_type->getDecl()->getDefinitionOrSelf();
       uint32_t field_idx = 0;
       clang::RecordDecl::field_iterator field, field_end;
       for (field = record_decl->field_begin(),
@@ -5916,7 +5915,7 @@ CompilerType TypeSystemClang::GetDirectBaseClassAtIndex(
                   llvm::cast<clang::CXXRecordDecl>(
                       base_class->getType()
                           ->castAs<clang::RecordType>()
-                          ->getOriginalDecl());
+                          ->getDecl());
               if (base_class->isVirtual())
                 *bit_offset_ptr =
                     record_layout.getVBaseClassOffset(base_class_decl)
@@ -6011,7 +6010,7 @@ CompilerType TypeSystemClang::GetVirtualBaseClassAtIndex(
                   llvm::cast<clang::CXXRecordDecl>(
                       base_class->getType()
                           ->castAs<clang::RecordType>()
-                          ->getOriginalDecl());
+                          ->getDecl());
               *bit_offset_ptr =
                   record_layout.getVBaseClassOffset(base_class_decl)
                       .getQuantity() *
@@ -6042,7 +6041,7 @@ TypeSystemClang::GetStaticFieldWithName(lldb::opaque_compiler_type_t type,
     const clang::RecordType *record_type =
         llvm::cast<clang::RecordType>(qual_type.getTypePtr());
     const clang::RecordDecl *record_decl =
-        record_type->getOriginalDecl()->getDefinitionOrSelf();
+        record_type->getDecl()->getDefinitionOrSelf();
 
     clang::DeclarationName decl_name(&getASTContext().Idents.get(name));
     for (NamedDecl *decl : record_decl->lookup(decl_name)) {
@@ -6271,7 +6270,7 @@ llvm::Expected<CompilerType> TypeSystemClang::GetChildCompilerTypeAtIndex(
       const clang::RecordType *record_type =
           llvm::cast<clang::RecordType>(parent_qual_type.getTypePtr());
       const clang::RecordDecl *record_decl =
-          record_type->getOriginalDecl()->getDefinitionOrSelf();
+          record_type->getDecl()->getDefinitionOrSelf();
       const clang::ASTRecordLayout &record_layout =
           getASTContext().getASTRecordLayout(record_decl);
       uint32_t child_idx = 0;
@@ -6292,7 +6291,7 @@ llvm::Expected<CompilerType> TypeSystemClang::GetChildCompilerTypeAtIndex(
             base_class_decl = llvm::cast<clang::CXXRecordDecl>(
                                   base_class->getType()
                                       ->getAs<clang::RecordType>()
-                                      ->getOriginalDecl())
+                                      ->getDecl())
                                   ->getDefinitionOrSelf();
             if (!TypeSystemClang::RecordHasFields(base_class_decl))
               continue;
@@ -6303,7 +6302,7 @@ llvm::Expected<CompilerType> TypeSystemClang::GetChildCompilerTypeAtIndex(
               base_class_decl = llvm::cast<clang::CXXRecordDecl>(
                                     base_class->getType()
                                         ->getAs<clang::RecordType>()
-                                        ->getOriginalDecl())
+                                        ->getDecl())
                                     ->getDefinitionOrSelf();
 
             if (base_class->isVirtual()) {
@@ -6766,7 +6765,7 @@ size_t TypeSystemClang::GetIndexOfChildMemberWithName(
         const clang::RecordType *record_type =
             llvm::cast<clang::RecordType>(qual_type.getTypePtr());
         const clang::RecordDecl *record_decl =
-            record_type->getOriginalDecl()->getDefinitionOrSelf();
+            record_type->getDecl()->getDefinitionOrSelf();
 
         assert(record_decl);
         uint32_t child_idx = 0;
@@ -6833,7 +6832,7 @@ size_t TypeSystemClang::GetIndexOfChildMemberWithName(
                   child_indexes.push_back(child_idx);
                   parent_record_decl = elem.Base->getType()
                                            ->castAs<clang::RecordType>()
-                                           ->getOriginalDecl()
+                                           ->getDecl()
                                            ->getDefinitionOrSelf();
                 }
               }
@@ -6969,7 +6968,7 @@ TypeSystemClang::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
         const clang::RecordType *record_type =
             llvm::cast<clang::RecordType>(qual_type.getTypePtr());
         const clang::RecordDecl *record_decl =
-            record_type->getOriginalDecl()->getDefinitionOrSelf();
+            record_type->getDecl()->getDefinitionOrSelf();
 
         assert(record_decl);
         uint32_t child_idx = 0;
@@ -6988,7 +6987,7 @@ TypeSystemClang::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
                 llvm::cast<clang::CXXRecordDecl>(
                     base_class->getType()
                         ->castAs<clang::RecordType>()
-                        ->getOriginalDecl())
+                        ->getDecl())
                     ->getDefinitionOrSelf();
             if (omit_empty_base_classes &&
                 !TypeSystemClang::RecordHasFields(base_class_decl))
@@ -7109,7 +7108,7 @@ TypeSystemClang::GetDirectNestedTypeWithName(lldb::opaque_compiler_type_t type,
     const clang::RecordType *record_type =
         llvm::cast<clang::RecordType>(qual_type.getTypePtr());
     const clang::RecordDecl *record_decl =
-        record_type->getOriginalDecl()->getDefinitionOrSelf();
+        record_type->getDecl()->getDefinitionOrSelf();
 
     clang::DeclarationName decl_name(&getASTContext().Idents.get(name));
     for (NamedDecl *decl : record_decl->lookup(decl_name)) {
@@ -7135,7 +7134,7 @@ bool TypeSystemClang::IsTemplateType(lldb::opaque_compiler_type_t type) {
   const clang::Type *clang_type = ClangUtil::GetQualType(ct).getTypePtr();
   if (auto *cxx_record_decl = dyn_cast<clang::TagType>(clang_type))
     return isa<clang::ClassTemplateSpecializationDecl>(
-        cxx_record_decl->getOriginalDecl());
+        cxx_record_decl->getDecl());
   return false;
 }
 
@@ -7338,7 +7337,7 @@ clang::EnumDecl *TypeSystemClang::GetAsEnumDecl(const CompilerType &type) {
   const clang::EnumType *enutype =
       llvm::dyn_cast<clang::EnumType>(ClangUtil::GetCanonicalQualType(type));
   if (enutype)
-    return enutype->getOriginalDecl()->getDefinitionOrSelf();
+    return enutype->getDecl()->getDefinitionOrSelf();
   return nullptr;
 }
 
@@ -7346,7 +7345,7 @@ clang::RecordDecl *TypeSystemClang::GetAsRecordDecl(const CompilerType &type) {
   const clang::RecordType *record_type =
       llvm::dyn_cast<clang::RecordType>(ClangUtil::GetCanonicalQualType(type));
   if (record_type)
-    return record_type->getOriginalDecl()->getDefinitionOrSelf();
+    return record_type->getDecl()->getDefinitionOrSelf();
   return nullptr;
 }
 
@@ -7428,7 +7427,7 @@ clang::FieldDecl *TypeSystemClang::AddFieldToRecordType(
       if (const clang::TagType *TagT =
               field->getType()->getAs<clang::TagType>()) {
         if (clang::RecordDecl *Rec =
-                llvm::dyn_cast<clang::RecordDecl>(TagT->getOriginalDecl()))
+                llvm::dyn_cast<clang::RecordDecl>(TagT->getDecl()))
           if (!Rec->getDeclName()) {
             Rec->setAnonymousStructOrUnion(true);
             field->setImplicit();
@@ -7514,7 +7513,7 @@ void TypeSystemClang::BuildIndirectFields(const CompilerType &type) {
         continue;
 
       clang::RecordDecl *field_record_decl =
-          field_record_type->getOriginalDecl()->getDefinition();
+          field_record_type->getDecl()->getDefinition();
 
       if (!field_record_decl)
         continue;
@@ -7656,8 +7655,7 @@ void TypeSystemClang::SetIntegerInitializerForVariable(
   // If the variable is an enum type, take the underlying integer type as
   // the type of the integer literal.
   if (const EnumType *enum_type = qt->getAs<EnumType>()) {
-    const EnumDecl *enum_decl =
-        enum_type->getOriginalDecl()->getDefinitionOrSelf();
+    const EnumDecl *enum_decl = enum_type->getDecl()->getDefinitionOrSelf();
     qt = enum_decl->getIntegerType();
   }
   // Bools are handled separately because the clang AST printer handles bools
@@ -8317,7 +8315,7 @@ bool TypeSystemClang::SetHasExternalStorage(lldb::opaque_compiler_type_t type,
 
   case clang::Type::Enum: {
     clang::EnumDecl *enum_decl =
-        llvm::cast<clang::EnumType>(qual_type)->getOriginalDecl();
+        llvm::cast<clang::EnumType>(qual_type)->getDecl();
     if (enum_decl) {
       enum_decl->setHasExternalLexicalStorage(has_extern);
       enum_decl->setHasExternalVisibleStorage(has_extern);
@@ -8355,7 +8353,7 @@ bool TypeSystemClang::StartTagDeclarationDefinition(const CompilerType &type) {
   if (!qual_type.isNull()) {
     const clang::TagType *tag_type = qual_type->getAs<clang::TagType>();
     if (tag_type) {
-      clang::TagDecl *tag_decl = tag_type->getOriginalDecl();
+      clang::TagDecl *tag_decl = tag_type->getDecl();
       if (tag_decl) {
         tag_decl->startDefinition();
         return true;
@@ -8390,8 +8388,7 @@ bool TypeSystemClang::CompleteTagDeclarationDefinition(
   // the definition.
   const clang::TagType *tag_type = qual_type->getAs<clang::TagType>();
   if (tag_type) {
-    clang::TagDecl *tag_decl =
-        tag_type->getOriginalDecl()->getDefinitionOrSelf();
+    clang::TagDecl *tag_decl = tag_type->getDecl()->getDefinitionOrSelf();
 
     if (auto *cxx_record_decl = llvm::dyn_cast<CXXRecordDecl>(tag_decl)) {
       // If we have a move constructor declared but no copy constructor we
@@ -8426,8 +8423,7 @@ bool TypeSystemClang::CompleteTagDeclarationDefinition(
 
   if (!enutype)
     return false;
-  clang::EnumDecl *enum_decl =
-      enutype->getOriginalDecl()->getDefinitionOrSelf();
+  clang::EnumDecl *enum_decl = enutype->getDecl()->getDefinitionOrSelf();
 
   if (enum_decl->isCompleteDefinition())
     return true;
@@ -8485,8 +8481,7 @@ clang::EnumConstantDecl *TypeSystemClang::AddEnumerationValueToEnumerationType(
   clang::EnumConstantDecl *enumerator_decl =
       clang::EnumConstantDecl::CreateDeserialized(getASTContext(),
                                                   GlobalDeclID());
-  clang::EnumDecl *enum_decl =
-      enutype->getOriginalDecl()->getDefinitionOrSelf();
+  clang::EnumDecl *enum_decl = enutype->getDecl()->getDefinitionOrSelf();
   enumerator_decl->setDeclContext(enum_decl);
   if (name && name[0])
     enumerator_decl->setDeclName(&getASTContext().Idents.get(name));
@@ -8521,8 +8516,7 @@ CompilerType TypeSystemClang::GetEnumerationIntegerType(CompilerType type) {
   if (!enum_type)
     return CompilerType();
 
-  return GetType(
-      enum_type->getOriginalDecl()->getDefinitionOrSelf()->getIntegerType());
+  return GetType(enum_type->getDecl()->getDefinitionOrSelf()->getIntegerType());
 }
 
 CompilerType
@@ -8630,8 +8624,7 @@ static bool DumpEnumValue(const clang::QualType &qual_type, Stream &s,
                           uint32_t bitfield_bit_size) {
   const clang::EnumType *enutype =
       llvm::cast<clang::EnumType>(qual_type.getTypePtr());
-  const clang::EnumDecl *enum_decl =
-      enutype->getOriginalDecl()->getDefinitionOrSelf();
+  const clang::EnumDecl *enum_decl = enutype->getDecl()->getDefinitionOrSelf();
   lldb::offset_t offset = byte_offset;
   bool qual_type_is_signed = qual_type->isSignedIntegerOrEnumerationType();
   const uint64_t enum_svalue =
@@ -8907,7 +8900,7 @@ void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
       GetCompleteType(type);
 
       auto *record_type = llvm::cast<clang::RecordType>(qual_type.getTypePtr());
-      const clang::RecordDecl *record_decl = record_type->getOriginalDecl();
+      const clang::RecordDecl *record_decl = record_type->getDecl();
       if (level == eDescriptionLevelVerbose)
         record_decl->dump(llvm_ostrm);
       else {
@@ -8919,7 +8912,7 @@ void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
     default: {
       if (auto *tag_type =
               llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr())) {
-        if (clang::TagDecl *tag_decl = tag_type->getOriginalDecl()) {
+        if (clang::TagDecl *tag_decl = tag_type->getDecl()) {
           if (level == eDescriptionLevelVerbose)
             tag_decl->dump(llvm_ostrm);
           else
@@ -8959,7 +8952,7 @@ void TypeSystemClang::DumpTypeName(const CompilerType &type) {
 
     case clang::Type::Enum: {
       clang::EnumDecl *enum_decl =
-          llvm::cast<clang::EnumType>(qual_type)->getOriginalDecl();
+          llvm::cast<clang::EnumType>(qual_type)->getDecl();
       if (enum_decl) {
         printf("enum %s", enum_decl->getName().str().c_str());
       }
@@ -9825,7 +9818,7 @@ bool TypeSystemClang::IsForcefullyCompleted(lldb::opaque_compiler_type_t type) {
         llvm::dyn_cast<clang::RecordType>(qual_type.getTypePtr());
     if (record_type) {
       const clang::RecordDecl *record_decl =
-          record_type->getOriginalDecl()->getDefinitionOrSelf();
+          record_type->getDecl()->getDefinitionOrSelf();
       if (std::optional<ClangASTMetadata> metadata = GetMetadata(record_decl))
         return metadata->IsForcefullyCompleted();
     }
diff --git a/lldb/source/Utility/XcodeSDK.cpp b/lldb/source/Utility/XcodeSDK.cpp
index 2040791..89e05de 100644
--- a/lldb/source/Utility/XcodeSDK.cpp
+++ b/lldb/source/Utility/XcodeSDK.cpp
@@ -38,8 +38,8 @@ static llvm::StringRef GetName(XcodeSDK::Type type) {
     return "XRSimulator";
   case XcodeSDK::XROS:
     return "XROS";
-  case XcodeSDK::bridgeOS:
-    return "bridgeOS";
+  case XcodeSDK::BridgeOS:
+    return "BridgeOS";
   case XcodeSDK::Linux:
     return "Linux";
   case XcodeSDK::unknown:
@@ -83,8 +83,8 @@ static XcodeSDK::Type ParseSDKName(llvm::StringRef &name) {
     return XcodeSDK::XRSimulator;
   if (name.consume_front("XROS"))
     return XcodeSDK::XROS;
-  if (name.consume_front("bridgeOS"))
-    return XcodeSDK::bridgeOS;
+  if (name.consume_front("BridgeOS"))
+    return XcodeSDK::BridgeOS;
   if (name.consume_front("Linux"))
     return XcodeSDK::Linux;
   static_assert(XcodeSDK::Linux == XcodeSDK::numSDKTypes - 1,
@@ -204,7 +204,7 @@ std::string XcodeSDK::GetCanonicalName(XcodeSDK::Info info) {
   case XROS:
     name = "xros";
     break;
-  case bridgeOS:
+  case BridgeOS:
     name = "bridgeos";
     break;
   case Linux:
diff --git a/lldb/test/API/macosx/debugserver-multimemread/Makefile b/lldb/test/API/macosx/debugserver-multimemread/Makefile
new file mode 100644
index 0000000..1049594
--- /dev/null
+++ b/lldb/test/API/macosx/debugserver-multimemread/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/macosx/debugserver-multimemread/TestDebugserverMultiMemRead.py b/lldb/test/API/macosx/debugserver-multimemread/TestDebugserverMultiMemRead.py
new file mode 100644
index 0000000..6caa5f8
--- /dev/null
+++ b/lldb/test/API/macosx/debugserver-multimemread/TestDebugserverMultiMemRead.py
@@ -0,0 +1,102 @@
+"""
+Tests debugserver support for MultiMemRead.
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+@skipUnlessDarwin
+@skipIfOutOfTreeDebugserver
+class TestCase(TestBase):
+    def send_process_packet(self, packet_str):
+        self.runCmd(f"proc plugin packet send {packet_str}", check=False)
+        # The output is of the form:
+        #  packet: <packet_str>
+        #  response: <response>
+        reply = self.res.GetOutput().split("\n")
+        packet = reply[0].strip()
+        response = reply[1].strip()
+
+        self.assertTrue(packet.startswith("packet: "))
+        self.assertTrue(response.startswith("response: "))
+        return response[len("response: ") :]
+
+    def check_invalid_packet(self, packet_str):
+        reply = self.send_process_packet("packet_str")
+        self.assertEqual(reply, "E03")
+
+    def test_packets(self):
+        self.build()
+        source_file = lldb.SBFileSpec("main.c")
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "break here", source_file
+        )
+
+        reply = self.send_process_packet("qSupported")
+        self.assertIn("MultiMemRead+", reply)
+
+        mem_address_var = thread.frames[0].FindVariable("memory")
+        self.assertTrue(mem_address_var)
+        mem_address = mem_address_var.GetValueAsUnsigned() + 42
+
+        # no ":"
+        self.check_invalid_packet("MultiMemRead")
+        # missing ranges
+        self.check_invalid_packet("MultiMemRead:")
+        # needs at least one range
+        self.check_invalid_packet("MultiMemRead:ranges:")
+        # needs at least one range
+        self.check_invalid_packet("MultiMemRead:ranges:,")
+        # a range is a pair of numbers
+        self.check_invalid_packet("MultiMemRead:ranges:10")
+        # a range is a pair of numbers
+        self.check_invalid_packet("MultiMemRead:ranges:10,")
+        # range list must end with ;
+        self.check_invalid_packet("MultiMemRead:ranges:10,2")
+        self.check_invalid_packet("MultiMemRead:ranges:10,2,")
+        self.check_invalid_packet("MultiMemRead:ranges:10,2,3")
+        # ranges are pairs of numbers.
+        self.check_invalid_packet("MultiMemRead:ranges:10,2,3;")
+        # unrecognized field
+        self.check_invalid_packet("MultiMemRead:ranges:10,2;blah:;")
+        # unrecognized field
+        self.check_invalid_packet("MultiMemRead:blah:;ranges:10,2;")
+
+        # Zero-length reads are ok.
+        reply = self.send_process_packet("MultiMemRead:ranges:0,0;")
+        self.assertEqual(reply, "0;")
+
+        # Debugserver is permissive with trailing commas.
+        reply = self.send_process_packet("MultiMemRead:ranges:10,2,;")
+        self.assertEqual(reply, "0;")
+        reply = self.send_process_packet(f"MultiMemRead:ranges:{mem_address:x},2,;")
+        self.assertEqual(reply, "2;ab")
+
+        reply = self.send_process_packet("MultiMemRead:ranges:10,2;")
+        self.assertEqual(reply, "0;")
+        reply = self.send_process_packet(f"MultiMemRead:ranges:{mem_address:x},0;")
+        self.assertEqual(reply, "0;")
+        reply = self.send_process_packet(f"MultiMemRead:ranges:{mem_address:x},2;")
+        self.assertEqual(reply, "2;ab")
+        reply = self.send_process_packet(
+            f"MultiMemRead:ranges:{mem_address:x},2,{mem_address+2:x},4;"
+        )
+        self.assertEqual(reply, "2,4;abcdef")
+        reply = self.send_process_packet(
+            f"MultiMemRead:ranges:{mem_address:x},2,{mem_address+2:x},4,{mem_address+6:x},8;"
+        )
+        self.assertEqual(reply, "2,4,8;abcdefghijklmn")
+
+        # Test zero length in the middle.
+        reply = self.send_process_packet(
+            f"MultiMemRead:ranges:{mem_address:x},2,{mem_address+2:x},0,{mem_address+6:x},8;"
+        )
+        self.assertEqual(reply, "2,0,8;abghijklmn")
+        # Test zero length in the end.
+        reply = self.send_process_packet(
+            f"MultiMemRead:ranges:{mem_address:x},2,{mem_address+2:x},4,{mem_address+6:x},0;"
+        )
+        self.assertEqual(reply, "2,4,0;abcdef")
diff --git a/lldb/test/API/macosx/debugserver-multimemread/main.c b/lldb/test/API/macosx/debugserver-multimemread/main.c
new file mode 100644
index 0000000..44cdd47
--- /dev/null
+++ b/lldb/test/API/macosx/debugserver-multimemread/main.c
@@ -0,0 +1,14 @@
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char **argv) {
+  char *memory = malloc(1024);
+  memset(memory, '-', 1024);
+  // Write "interesting" characters at an offset from the memory filled with
+  // `-`. This way, if we read outside the range in either direction, we should
+  // find `-`s`.
+  int offset = 42;
+  for (int i = offset; i < offset + 14; i++)
+    memory[i] = 'a' + (i - offset);
+  return 0; // break here
+}
diff --git a/lldb/test/API/tools/lldb-dap/attach-commands/Makefile b/lldb/test/API/tools/lldb-dap/attach-commands/Makefile
new file mode 100644
index 0000000..1049594
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/attach-commands/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/tools/lldb-dap/attach-commands/TestDAP_attachCommands.py b/lldb/test/API/tools/lldb-dap/attach-commands/TestDAP_attachCommands.py
new file mode 100644
index 0000000..9e29f07
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/attach-commands/TestDAP_attachCommands.py
@@ -0,0 +1,145 @@
+"""
+Test lldb-dap attach commands
+"""
+
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+import lldbdap_testcase
+import time
+
+
+class TestDAP_attachCommands(lldbdap_testcase.DAPTestCaseBase):
+    @skipIfNetBSD  # Hangs on NetBSD as well
+    def test_commands(self):
+        """
+        Tests the "initCommands", "preRunCommands", "stopCommands",
+        "exitCommands", "terminateCommands" and "attachCommands"
+        that can be passed during attach.
+
+        "initCommands" are a list of LLDB commands that get executed
+        before the target is created.
+        "preRunCommands" are a list of LLDB commands that get executed
+        after the target has been created and before the launch.
+        "stopCommands" are a list of LLDB commands that get executed each
+        time the program stops.
+        "exitCommands" are a list of LLDB commands that get executed when
+        the process exits
+        "attachCommands" are a list of LLDB commands that get executed and
+        must have a valid process in the selected target in LLDB after
+        they are done executing. This allows custom commands to create any
+        kind of debug session.
+        "terminateCommands" are a list of LLDB commands that get executed when
+        the debugger session terminates.
+        """
+        program = self.build_and_create_debug_adapter_for_attach()
+
+        # Here we just create a target and launch the process as a way to test
+        # if we are able to use attach commands to create any kind of a target
+        # and use it for debugging
+        attachCommands = [
+            'target create -d "%s"' % (program),
+            "process launch --stop-at-entry",
+        ]
+        initCommands = ["target list", "platform list"]
+        preRunCommands = ["image list a.out", "image dump sections a.out"]
+        postRunCommands = ["help trace", "help process trace"]
+        stopCommands = ["frame variable", "thread backtrace"]
+        exitCommands = ["expr 2+3", "expr 3+4"]
+        terminateCommands = ["expr 4+2"]
+        self.attach(
+            program=program,
+            attachCommands=attachCommands,
+            initCommands=initCommands,
+            preRunCommands=preRunCommands,
+            stopCommands=stopCommands,
+            exitCommands=exitCommands,
+            terminateCommands=terminateCommands,
+            postRunCommands=postRunCommands,
+        )
+        # Get output from the console. This should contain both the
+        # "initCommands" and the "preRunCommands".
+        output = self.get_console()
+        # Verify all "initCommands" were found in console output
+        self.verify_commands("initCommands", output, initCommands)
+        # Verify all "preRunCommands" were found in console output
+        self.verify_commands("preRunCommands", output, preRunCommands)
+        # Verify all "postRunCommands" were found in console output
+        self.verify_commands("postRunCommands", output, postRunCommands)
+
+        functions = ["main"]
+        breakpoint_ids = self.set_function_breakpoints(functions)
+        self.assertEqual(len(breakpoint_ids), len(functions), "expect one breakpoint")
+        self.continue_to_breakpoints(breakpoint_ids)
+        output = self.collect_console(pattern=stopCommands[-1])
+        self.verify_commands("stopCommands", output, stopCommands)
+
+        # Continue after launch and hit the "pause()" call and stop the target.
+        # Get output from the console. This should contain both the
+        # "stopCommands" that were run after we stop.
+        self.do_continue()
+        time.sleep(0.5)
+        self.dap_server.request_pause()
+        self.dap_server.wait_for_stopped()
+        output = self.collect_console(pattern=stopCommands[-1])
+        self.verify_commands("stopCommands", output, stopCommands)
+
+        # Continue until the program exits
+        self.continue_to_exit()
+        # Get output from the console. This should contain both the
+        # "exitCommands" that were run after the second breakpoint was hit
+        # and the "terminateCommands" due to the debugging session ending
+        output = self.collect_console(
+            pattern=terminateCommands[0],
+        )
+        self.verify_commands("exitCommands", output, exitCommands)
+        self.verify_commands("terminateCommands", output, terminateCommands)
+
+    def test_attach_command_process_failures(self):
+        """
+        Tests that a 'attachCommands' is expected to leave the debugger's
+        selected target with a valid process.
+        """
+        program = self.build_and_create_debug_adapter_for_attach()
+        attachCommands = ['script print("oops, forgot to attach to a process...")']
+        resp = self.attach(
+            program=program,
+            attachCommands=attachCommands,
+            expectFailure=True,
+        )
+        self.assertFalse(resp["success"])
+        self.assertIn(
+            "attachCommands failed to attach to a process",
+            resp["body"]["error"]["format"],
+        )
+
+    @skipIfNetBSD  # Hangs on NetBSD as well
+    def test_terminate_commands(self):
+        """
+        Tests that the "terminateCommands", that can be passed during
+        attach, are run when the debugger is disconnected.
+        """
+        program = self.build_and_create_debug_adapter_for_attach()
+
+        # Here we just create a target and launch the process as a way to test
+        # if we are able to use attach commands to create any kind of a target
+        # and use it for debugging
+        attachCommands = [
+            'target create -d "%s"' % (program),
+            "process launch --stop-at-entry",
+        ]
+        terminateCommands = ["expr 4+2"]
+        self.attach(
+            program=program,
+            attachCommands=attachCommands,
+            terminateCommands=terminateCommands,
+            disconnectAutomatically=False,
+        )
+        self.get_console()
+        # Once it's disconnected the console should contain the
+        # "terminateCommands"
+        self.dap_server.request_disconnect(terminateDebuggee=True)
+        output = self.collect_console(
+            pattern=terminateCommands[0],
+        )
+        self.verify_commands("terminateCommands", output, terminateCommands)
diff --git a/lldb/test/API/tools/lldb-dap/attach-commands/main.c b/lldb/test/API/tools/lldb-dap/attach-commands/main.c
new file mode 100644
index 0000000..f56d5d5
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/attach-commands/main.c
@@ -0,0 +1,30 @@
+#include "attach.h"
+#include <stdio.h>
+#ifdef _WIN32
+#include <process.h>
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+
+int main(int argc, char const *argv[]) {
+  lldb_enable_attach();
+
+  if (argc >= 2) {
+    // Create the synchronization token.
+    FILE *f = fopen(argv[1], "wx");
+    if (!f)
+      return 1;
+    fputs("\n", f);
+    fflush(f);
+    fclose(f);
+  }
+
+  printf("pid = %i\n", getpid());
+#ifdef _WIN32
+  Sleep(10 * 1000);
+#else
+  sleep(10);
+#endif
+  return 0; // breakpoint 1
+}
diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
index c54e21c..5331a9f 100644
--- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
+++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
@@ -11,50 +11,35 @@ import threading
 import time
 
 
-def spawn_and_wait(program, delay):
-    if delay:
-        time.sleep(delay)
-    process = subprocess.Popen(
-        [program], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
-    )
-    process.wait()
-
-
 class TestDAP_attach(lldbdap_testcase.DAPTestCaseBase):
-    def set_and_hit_breakpoint(self, continueToExit=True):
-        source = "main.c"
-        breakpoint1_line = line_number(source, "// breakpoint 1")
-        lines = [breakpoint1_line]
-        # Set breakpoint in the thread function so we can step the threads
-        breakpoint_ids = self.set_source_breakpoints(source, lines)
-        self.assertEqual(
-            len(breakpoint_ids), len(lines), "expect correct number of breakpoints"
-        )
-        # Test binary will sleep for 10s, offset the breakpoint timeout
-        # accordingly.
-        timeout_offset = 10
-        self.continue_to_breakpoints(
-            breakpoint_ids, timeout=timeout_offset + self.DEFAULT_TIMEOUT
+    def spawn(self, args):
+        self.process = subprocess.Popen(
+            args,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
         )
-        if continueToExit:
-            self.continue_to_exit()
 
-    @skipIfNetBSD  # Hangs on NetBSD as well
+    def spawn_and_wait(self, program, delay):
+        time.sleep(delay)
+        self.spawn([program])
+        self.process.wait()
+
+    def continue_and_verify_pid(self):
+        self.do_continue()
+        out, _ = self.process.communicate("foo")
+        self.assertIn(f"pid = {self.process.pid}", out)
+
     def test_by_pid(self):
         """
         Tests attaching to a process by process ID.
         """
         program = self.build_and_create_debug_adapter_for_attach()
-        self.process = subprocess.Popen(
-            [program],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
+        self.spawn([program])
         self.attach(pid=self.process.pid)
-        self.set_and_hit_breakpoint(continueToExit=True)
+        self.continue_and_verify_pid()
 
-    @skipIfNetBSD  # Hangs on NetBSD as well
     def test_by_name(self):
         """
         Tests attaching to a process by process name.
@@ -65,24 +50,20 @@ class TestDAP_attach(lldbdap_testcase.DAPTestCaseBase):
         pid_file_path = lldbutil.append_to_process_working_directory(
             self, "pid_file_%d" % (int(time.time()))
         )
-
-        popen = self.spawnSubprocess(program, [pid_file_path])
+        self.spawn([program, pid_file_path])
         lldbutil.wait_for_file_on_target(self, pid_file_path)
 
         self.attach(program=program)
-        self.set_and_hit_breakpoint(continueToExit=True)
+        self.continue_and_verify_pid()
 
-    @skipUnlessDarwin
-    @skipIfNetBSD  # Hangs on NetBSD as well
     def test_by_name_waitFor(self):
         """
-        Tests attaching to a process by process name and waiting for the
-        next instance of a process to be launched, ignoring all current
-        ones.
+        Tests waiting for, and attaching to a process by process name that
+        doesn't exist yet.
         """
         program = self.build_and_create_debug_adapter_for_attach()
         self.spawn_thread = threading.Thread(
-            target=spawn_and_wait,
+            target=self.spawn_and_wait,
             args=(
                 program,
                 1.0,
@@ -90,140 +71,4 @@ class TestDAP_attach(lldbdap_testcase.DAPTestCaseBase):
         )
         self.spawn_thread.start()
         self.attach(program=program, waitFor=True)
-        self.set_and_hit_breakpoint(continueToExit=True)
-
-    @skipIfNetBSD  # Hangs on NetBSD as well
-    def test_commands(self):
-        """
-        Tests the "initCommands", "preRunCommands", "stopCommands",
-        "exitCommands", "terminateCommands" and "attachCommands"
-        that can be passed during attach.
-
-        "initCommands" are a list of LLDB commands that get executed
-        before the target is created.
-        "preRunCommands" are a list of LLDB commands that get executed
-        after the target has been created and before the launch.
-        "stopCommands" are a list of LLDB commands that get executed each
-        time the program stops.
-        "exitCommands" are a list of LLDB commands that get executed when
-        the process exits
-        "attachCommands" are a list of LLDB commands that get executed and
-        must have a valid process in the selected target in LLDB after
-        they are done executing. This allows custom commands to create any
-        kind of debug session.
-        "terminateCommands" are a list of LLDB commands that get executed when
-        the debugger session terminates.
-        """
-        program = self.build_and_create_debug_adapter_for_attach()
-
-        # Here we just create a target and launch the process as a way to test
-        # if we are able to use attach commands to create any kind of a target
-        # and use it for debugging
-        attachCommands = [
-            'target create -d "%s"' % (program),
-            "process launch --stop-at-entry",
-        ]
-        initCommands = ["target list", "platform list"]
-        preRunCommands = ["image list a.out", "image dump sections a.out"]
-        postRunCommands = ["help trace", "help process trace"]
-        stopCommands = ["frame variable", "thread backtrace"]
-        exitCommands = ["expr 2+3", "expr 3+4"]
-        terminateCommands = ["expr 4+2"]
-        self.attach(
-            program=program,
-            attachCommands=attachCommands,
-            initCommands=initCommands,
-            preRunCommands=preRunCommands,
-            stopCommands=stopCommands,
-            exitCommands=exitCommands,
-            terminateCommands=terminateCommands,
-            postRunCommands=postRunCommands,
-        )
-        # Get output from the console. This should contain both the
-        # "initCommands" and the "preRunCommands".
-        output = self.get_console()
-        # Verify all "initCommands" were found in console output
-        self.verify_commands("initCommands", output, initCommands)
-        # Verify all "preRunCommands" were found in console output
-        self.verify_commands("preRunCommands", output, preRunCommands)
-        # Verify all "postRunCommands" were found in console output
-        self.verify_commands("postRunCommands", output, postRunCommands)
-
-        functions = ["main"]
-        breakpoint_ids = self.set_function_breakpoints(functions)
-        self.assertEqual(len(breakpoint_ids), len(functions), "expect one breakpoint")
-        self.continue_to_breakpoints(breakpoint_ids)
-        output = self.collect_console(timeout=10, pattern=stopCommands[-1])
-        self.verify_commands("stopCommands", output, stopCommands)
-
-        # Continue after launch and hit the "pause()" call and stop the target.
-        # Get output from the console. This should contain both the
-        # "stopCommands" that were run after we stop.
-        self.do_continue()
-        time.sleep(0.5)
-        self.dap_server.request_pause()
-        self.dap_server.wait_for_stopped()
-        output = self.collect_console(timeout=10, pattern=stopCommands[-1])
-        self.verify_commands("stopCommands", output, stopCommands)
-
-        # Continue until the program exits
-        self.continue_to_exit()
-        # Get output from the console. This should contain both the
-        # "exitCommands" that were run after the second breakpoint was hit
-        # and the "terminateCommands" due to the debugging session ending
-        output = self.collect_console(
-            timeout=10.0,
-            pattern=terminateCommands[0],
-        )
-        self.verify_commands("exitCommands", output, exitCommands)
-        self.verify_commands("terminateCommands", output, terminateCommands)
-
-    def test_attach_command_process_failures(self):
-        """
-        Tests that a 'attachCommands' is expected to leave the debugger's
-        selected target with a valid process.
-        """
-        program = self.build_and_create_debug_adapter_for_attach()
-        attachCommands = ['script print("oops, forgot to attach to a process...")']
-        resp = self.attach(
-            program=program,
-            attachCommands=attachCommands,
-            expectFailure=True,
-        )
-        self.assertFalse(resp["success"])
-        self.assertIn(
-            "attachCommands failed to attach to a process",
-            resp["body"]["error"]["format"],
-        )
-
-    @skipIfNetBSD  # Hangs on NetBSD as well
-    def test_terminate_commands(self):
-        """
-        Tests that the "terminateCommands", that can be passed during
-        attach, are run when the debugger is disconnected.
-        """
-        program = self.build_and_create_debug_adapter_for_attach()
-
-        # Here we just create a target and launch the process as a way to test
-        # if we are able to use attach commands to create any kind of a target
-        # and use it for debugging
-        attachCommands = [
-            'target create -d "%s"' % (program),
-            "process launch --stop-at-entry",
-        ]
-        terminateCommands = ["expr 4+2"]
-        self.attach(
-            program=program,
-            attachCommands=attachCommands,
-            terminateCommands=terminateCommands,
-            disconnectAutomatically=False,
-        )
-        self.get_console()
-        # Once it's disconnected the console should contain the
-        # "terminateCommands"
-        self.dap_server.request_disconnect(terminateDebuggee=True)
-        output = self.collect_console(
-            timeout=1.0,
-            pattern=terminateCommands[0],
-        )
-        self.verify_commands("terminateCommands", output, terminateCommands)
+        self.continue_and_verify_pid()
diff --git a/lldb/test/API/tools/lldb-dap/attach/main.c b/lldb/test/API/tools/lldb-dap/attach/main.c
index f56d5d5..e14cf71 100644
--- a/lldb/test/API/tools/lldb-dap/attach/main.c
+++ b/lldb/test/API/tools/lldb-dap/attach/main.c
@@ -2,7 +2,6 @@
 #include <stdio.h>
 #ifdef _WIN32
 #include <process.h>
-#include <windows.h>
 #else
 #include <unistd.h>
 #endif
@@ -20,11 +19,9 @@ int main(int argc, char const *argv[]) {
     fclose(f);
   }
 
+  // Wait on input from stdin.
+  getchar();
+
   printf("pid = %i\n", getpid());
-#ifdef _WIN32
-  Sleep(10 * 1000);
-#else
-  sleep(10);
-#endif
-  return 0; // breakpoint 1
+  return 0;
 }
diff --git a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
index 151ad76..beab4d6 100644
--- a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
@@ -82,14 +82,14 @@ class TestDAP_breakpointEvents(lldbdap_testcase.DAPTestCaseBase):
             )
 
         # Flush the breakpoint events.
-        self.dap_server.wait_for_breakpoint_events(timeout=5)
+        self.dap_server.wait_for_breakpoint_events()
 
         # Continue to the breakpoint
         self.continue_to_breakpoints(dap_breakpoint_ids)
 
         verified_breakpoint_ids = []
         unverified_breakpoint_ids = []
-        for breakpoint_event in self.dap_server.wait_for_breakpoint_events(timeout=5):
+        for breakpoint_event in self.dap_server.wait_for_breakpoint_events():
             breakpoint = breakpoint_event["body"]["breakpoint"]
             id = breakpoint["id"]
             if breakpoint["verified"]:
diff --git a/lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py b/lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py
index e722fce..14789a6 100644
--- a/lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py
+++ b/lldb/test/API/tools/lldb-dap/cancel/TestDAP_cancel.py
@@ -46,7 +46,7 @@ class TestDAP_cancel(lldbdap_testcase.DAPTestCaseBase):
 
         # Use a relatively short timeout since this is only to ensure the
         # following request is queued.
-        blocking_seq = self.async_blocking_request(duration=1.0)
+        blocking_seq = self.async_blocking_request(duration=self.DEFAULT_TIMEOUT / 10)
         # Use a longer timeout to ensure we catch if the request was interrupted
         # properly.
         pending_seq = self.async_blocking_request(duration=self.DEFAULT_TIMEOUT / 2)
diff --git a/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py b/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py
index e61d248..f53813a 100644
--- a/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py
+++ b/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py
@@ -23,7 +23,6 @@ class TestDAP_commands(lldbdap_testcase.DAPTestCaseBase):
             exitCommands=["?" + command_quiet, command_not_quiet],
         )
         full_output = self.collect_console(
-            timeout=1.0,
             pattern=command_not_quiet,
         )
         self.assertNotIn(command_quiet, full_output)
@@ -51,7 +50,6 @@ class TestDAP_commands(lldbdap_testcase.DAPTestCaseBase):
             expectFailure=True,
         )
         full_output = self.collect_console(
-            timeout=1.0,
             pattern=command_abort_on_error,
         )
         self.assertNotIn(command_quiet, full_output)
diff --git a/lldb/test/API/tools/lldb-dap/io/TestDAP_io.py b/lldb/test/API/tools/lldb-dap/io/TestDAP_io.py
index af5c62a..9fbe9aa 100644
--- a/lldb/test/API/tools/lldb-dap/io/TestDAP_io.py
+++ b/lldb/test/API/tools/lldb-dap/io/TestDAP_io.py
@@ -44,7 +44,7 @@ class TestDAP_io(lldbdap_testcase.DAPTestCaseBase):
         """
         process = self.launch()
         process.stdin.close()
-        self.assertEqual(process.wait(timeout=5.0), EXIT_SUCCESS)
+        self.assertEqual(process.wait(timeout=self.DEFAULT_TIMEOUT), EXIT_SUCCESS)
 
     def test_invalid_header(self):
         """
@@ -54,7 +54,7 @@ class TestDAP_io(lldbdap_testcase.DAPTestCaseBase):
         process = self.launch()
         process.stdin.write(b"not the correct message header")
         process.stdin.close()
-        self.assertEqual(process.wait(timeout=5.0), EXIT_FAILURE)
+        self.assertEqual(process.wait(timeout=self.DEFAULT_TIMEOUT), EXIT_FAILURE)
 
     def test_partial_header(self):
         """
@@ -64,7 +64,7 @@ class TestDAP_io(lldbdap_testcase.DAPTestCaseBase):
         process = self.launch()
         process.stdin.write(b"Content-Length: ")
         process.stdin.close()
-        self.assertEqual(process.wait(timeout=5.0), EXIT_FAILURE)
+        self.assertEqual(process.wait(timeout=self.DEFAULT_TIMEOUT), EXIT_FAILURE)
 
     def test_incorrect_content_length(self):
         """
@@ -74,7 +74,7 @@ class TestDAP_io(lldbdap_testcase.DAPTestCaseBase):
         process = self.launch()
         process.stdin.write(b"Content-Length: abc")
         process.stdin.close()
-        self.assertEqual(process.wait(timeout=5.0), EXIT_FAILURE)
+        self.assertEqual(process.wait(timeout=self.DEFAULT_TIMEOUT), EXIT_FAILURE)
 
     def test_partial_content_length(self):
         """
@@ -84,4 +84,4 @@ class TestDAP_io(lldbdap_testcase.DAPTestCaseBase):
         process = self.launch()
         process.stdin.write(b"Content-Length: 10\r\n\r\n{")
         process.stdin.close()
-        self.assertEqual(process.wait(timeout=5.0), EXIT_FAILURE)
+        self.assertEqual(process.wait(timeout=self.DEFAULT_TIMEOUT), EXIT_FAILURE)
diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
index ceef95df..8db2316 100644
--- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
+++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
@@ -632,7 +632,27 @@ class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase):
         program = self.getBuildArtifact("a.out")
 
         with tempfile.NamedTemporaryFile("rt") as f:
-            self.launch(program, stdio=[None, f.name, None])
+            self.launch(program, stdio=[None, f.name])
+            self.continue_to_exit()
+            lines = f.readlines()
+            self.assertIn(
+                program, lines[0], "make sure program path is in first argument"
+            )
+
+    @skipIfAsan
+    @skipIfWindows
+    @skipIf(oslist=["linux"], archs=no_match(["x86_64"]))
+    def test_stdio_redirection_and_console(self):
+        """
+        Test stdio redirection and console.
+        """
+        self.build_and_create_debug_adapter()
+        program = self.getBuildArtifact("a.out")
+
+        with tempfile.NamedTemporaryFile("rt") as f:
+            self.launch(
+                program, console="integratedTerminal", stdio=[None, f.name, None]
+            )
             self.continue_to_exit()
             lines = f.readlines()
             self.assertIn(
diff --git a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py
index bb835af..1f4afab 100644
--- a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py
+++ b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py
@@ -23,15 +23,15 @@ class TestDAP_module_event(lldbdap_testcase.DAPTestCaseBase):
         self.continue_to_breakpoints(breakpoint_ids)
 
         # We're now stopped at breakpoint 1 before the dlopen. Flush all the module events.
-        event = self.dap_server.wait_for_event(["module"], 0.25)
+        event = self.dap_server.wait_for_event(["module"])
         while event is not None:
-            event = self.dap_server.wait_for_event(["module"], 0.25)
+            event = self.dap_server.wait_for_event(["module"])
 
         # Continue to the second breakpoint, before the dlclose.
         self.continue_to_breakpoints(breakpoint_ids)
 
         # Make sure we got a module event for libother.
-        event = self.dap_server.wait_for_event(["module"], 5)
+        event = self.dap_server.wait_for_event(["module"])
         self.assertIsNotNone(event, "didn't get a module event")
         module_name = event["body"]["module"]["name"]
         module_id = event["body"]["module"]["id"]
@@ -42,7 +42,7 @@ class TestDAP_module_event(lldbdap_testcase.DAPTestCaseBase):
         self.continue_to_breakpoints(breakpoint_ids)
 
         # Make sure we got a module event for libother.
-        event = self.dap_server.wait_for_event(["module"], 5)
+        event = self.dap_server.wait_for_event(["module"])
         self.assertIsNotNone(event, "didn't get a module event")
         reason = event["body"]["reason"]
         self.assertEqual(reason, "removed")
@@ -55,8 +55,4 @@ class TestDAP_module_event(lldbdap_testcase.DAPTestCaseBase):
         self.assertListEqual(list(module_data.keys()), required_keys)
         self.assertEqual(module_data["name"], "", "expects empty name.")
 
-        # Make sure we do not send another event
-        event = self.dap_server.wait_for_event(["module"], 3)
-        self.assertIsNone(event, "expects no events.")
-
         self.continue_to_exit()
diff --git a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
index c5a6837..0ed53da 100644
--- a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
+++ b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
@@ -67,7 +67,7 @@ class TestDAP_module(lldbdap_testcase.DAPTestCaseBase):
         # Collect all the module names we saw as events.
         module_new_names = []
         module_changed_names = []
-        module_event = self.dap_server.wait_for_event(["module"], 1)
+        module_event = self.dap_server.wait_for_event(["module"])
         while module_event is not None:
             reason = module_event["body"]["reason"]
             if reason == "new":
@@ -75,7 +75,7 @@ class TestDAP_module(lldbdap_testcase.DAPTestCaseBase):
             elif reason == "changed":
                 module_changed_names.append(module_event["body"]["module"]["name"])
 
-            module_event = self.dap_server.wait_for_event(["module"], 1)
+            module_event = self.dap_server.wait_for_event(["module"])
 
         # Make sure we got an event for every active module.
         self.assertNotEqual(len(module_new_names), 0)
diff --git a/lldb/test/API/tools/lldb-dap/output/TestDAP_output.py b/lldb/test/API/tools/lldb-dap/output/TestDAP_output.py
index fe978a9..0065258 100644
--- a/lldb/test/API/tools/lldb-dap/output/TestDAP_output.py
+++ b/lldb/test/API/tools/lldb-dap/output/TestDAP_output.py
@@ -29,7 +29,7 @@ class TestDAP_output(lldbdap_testcase.DAPTestCaseBase):
         self.continue_to_breakpoints(breakpoint_ids)
 
         # Ensure partial messages are still sent.
-        output = self.collect_stdout(timeout=1.0, pattern="abcdef")
+        output = self.collect_stdout(pattern="abcdef")
         self.assertTrue(output and len(output) > 0, "expect program stdout")
 
         self.continue_to_exit()
diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py
index 6748379..e1ad142 100644
--- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py
+++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py
@@ -105,7 +105,7 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase):
         # Restart and check that we still get a stopped event before reaching
         # main.
         self.dap_server.request_restart()
-        stopped_events = self.dap_server.wait_for_stopped(timeout=20)
+        stopped_events = self.dap_server.wait_for_stopped()
         self.verify_stopped_on_entry(stopped_events)
 
         # continue to main
diff --git a/lldb/test/API/tools/lldb-dap/stackTraceDisassemblyDisplay/TestDAP_stackTraceDisassemblyDisplay.py b/lldb/test/API/tools/lldb-dap/stackTraceDisassemblyDisplay/TestDAP_stackTraceDisassemblyDisplay.py
index 08c225b..6008a0c 100644
--- a/lldb/test/API/tools/lldb-dap/stackTraceDisassemblyDisplay/TestDAP_stackTraceDisassemblyDisplay.py
+++ b/lldb/test/API/tools/lldb-dap/stackTraceDisassemblyDisplay/TestDAP_stackTraceDisassemblyDisplay.py
@@ -29,7 +29,7 @@ class TestDAP_stackTraceMissingSourcePath(lldbdap_testcase.DAPTestCaseBase):
         """
         Build the program and run until the breakpoint is hit, and return the stack frames.
         """
-        other_source_file = "other.c"
+        other_source_file = self.getBuildArtifact("other.c")
         with delete_file_on_exit(other_source_file):
             with open(other_source_file, "w") as f:
                 f.write(OTHER_C_SOURCE_CODE)
@@ -169,3 +169,4 @@ class TestDAP_stackTraceMissingSourcePath(lldbdap_testcase.DAPTestCaseBase):
         self.verify_frames_source(
             frames, main_frame_assembly=False, other_frame_assembly=False
         )
+        self.continue_to_exit()
diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
index 13a69460..977d6ce 100644
--- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
+++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
@@ -65,6 +65,11 @@ class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
                 self.assertNotIn(
                     key, actual, 'key "%s" is not expected in %s' % (key, actual)
                 )
+        isReadOnly = verify_dict.get("readOnly", False)
+        attributes = actual.get("presentationHint", {}).get("attributes", [])
+        self.assertEqual(
+            isReadOnly, "readOnly" in attributes, "%s %s" % (verify_dict, actual)
+        )
         hasVariablesReference = "variablesReference" in actual
         varRef = None
         if hasVariablesReference:
@@ -179,8 +184,9 @@ class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
                 "children": {
                     "x": {"equals": {"type": "int", "value": "11"}},
                     "y": {"equals": {"type": "int", "value": "22"}},
-                    "buffer": {"children": buffer_children},
+                    "buffer": {"children": buffer_children, "readOnly": True},
                 },
+                "readOnly": True,
             },
             "x": {"equals": {"type": "int"}},
         }
@@ -444,8 +450,10 @@ class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
                     "buffer": {
                         "children": buffer_children,
                         "equals": {"indexedVariables": 16},
+                        "readOnly": True,
                     },
                 },
+                "readOnly": True,
             },
             "x": {
                 "equals": {"type": "int"},
@@ -528,7 +536,7 @@ class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
             "children": {
                 "x": {"equals": {"type": "int", "value": "11"}},
                 "y": {"equals": {"type": "int", "value": "22"}},
-                "buffer": {"children": buffer_children},
+                "buffer": {"children": buffer_children, "readOnly": True},
             },
         }
 
@@ -622,11 +630,17 @@ class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
         # "[raw]" child.
         raw_child_count = 1 if enableSyntheticChildDebugging else 0
         verify_locals = {
-            "small_array": {"equals": {"indexedVariables": 5}},
-            "large_array": {"equals": {"indexedVariables": 200}},
-            "small_vector": {"equals": {"indexedVariables": 5 + raw_child_count}},
-            "large_vector": {"equals": {"indexedVariables": 200 + raw_child_count}},
-            "pt": {"missing": ["indexedVariables"]},
+            "small_array": {"equals": {"indexedVariables": 5}, "readOnly": True},
+            "large_array": {"equals": {"indexedVariables": 200}, "readOnly": True},
+            "small_vector": {
+                "equals": {"indexedVariables": 5 + raw_child_count},
+                "readOnly": True,
+            },
+            "large_vector": {
+                "equals": {"indexedVariables": 200 + raw_child_count},
+                "readOnly": True,
+            },
+            "pt": {"missing": ["indexedVariables"], "readOnly": True},
         }
         self.verify_variables(verify_locals, locals)
 
@@ -640,7 +654,10 @@ class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
             "[4]": {"equals": {"type": "int", "value": "0"}},
         }
         if enableSyntheticChildDebugging:
-            verify_children["[raw]"] = ({"contains": {"type": ["vector"]}},)
+            verify_children["[raw]"] = {
+                "contains": {"type": ["vector"]},
+                "readOnly": True,
+            }
 
         children = self.dap_server.request_variables(locals[2]["variablesReference"])[
             "body"
@@ -660,7 +677,7 @@ class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
             return_name: {"equals": {"type": "int", "value": "300"}},
             "argc": {},
             "argv": {},
-            "pt": {},
+            "pt": {"readOnly": True},
             "x": {},
             "return_result": {"equals": {"type": "int"}},
         }
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/local-variables-registers.s b/lldb/test/Shell/SymbolFile/NativePDB/local-variables-registers.s
index fe2f397..b44b99a 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/local-variables-registers.s
+++ b/lldb/test/Shell/SymbolFile/NativePDB/local-variables-registers.s
@@ -578,12 +578,12 @@ main:                                   # @main
 # CHECK:      (lldb) image lookup -a 0x14000104e -v
 # CHECK:          LineEntry: [0x000000014000104e-0x0000000140001050): C:\src\test\a.cpp:1004
 # CHECK-NEXT:        Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main"
-# CHECK-NEXT:      Variable: id = {{.*}}, name = "simple_type1", type = "int64_t", valid ranges = <block>, location = [0x000000014000104e, 0x000000014000104f) -> DW_OP_reg26 XMM9, DW_OP_piece 0x4, DW_OP_reg24 XMM7, DW_OP_piece 0x4
+# CHECK-NEXT:      Variable: id = {{.*}}, name = "simple_type1", type = "long long", valid ranges = <block>, location = [0x000000014000104e, 0x000000014000104f) -> DW_OP_reg26 XMM9, DW_OP_piece 0x4, DW_OP_reg24 XMM7, DW_OP_piece 0x4
 # CHECK-EMPTY:
 # CHECK:      (lldb) image lookup -a 0x14000104f -v
 # CHECK:          LineEntry: [0x000000014000104e-0x0000000140001050): C:\src\test\a.cpp:1004
 # CHECK-NEXT:        Symbol: id = {{.*}}, range = [0x0000000140001011-0x0000000140001050), name="main"
-# CHECK-NEXT:      Variable: id = {{.*}}, name = "simple_type1", type = "int64_t", valid ranges = <block>, location = [0x000000014000104f, 0x0000000140001050) -> DW_OP_reg26 XMM9, DW_OP_piece 0x4, DW_OP_piece 0x4
+# CHECK-NEXT:      Variable: id = {{.*}}, name = "simple_type1", type = "long long", valid ranges = <block>, location = [0x000000014000104f, 0x0000000140001050) -> DW_OP_reg26 XMM9, DW_OP_piece 0x4, DW_OP_piece 0x4
 # CHECK-EMPTY:
 
 .Ltmp26:
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/simple-types.cpp b/lldb/test/Shell/SymbolFile/NativePDB/simple-types.cpp
index 403cd29..3781194 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/simple-types.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/simple-types.cpp
@@ -58,20 +58,28 @@ int main() {
 
   MyStruct my_struct;
 
+  _Float16 f16;
+
+  _Complex float cf;
+  _Complex double cd;
+
+  __int128 i128;
+  unsigned __int128 ui128;
+
   decltype(nullptr) np;
 }
 
-// CHECK-DAG: Type{{.*}} , name = "std::nullptr_t", size = 0, compiler_type = 0x{{[0-9a-f]+}} nullptr_t
+// CHECK-DAG: Type{{.*}} , name = "decltype(nullptr)", compiler_type = 0x{{[0-9a-f]+}} nullptr_t
 
 // CHECK-DAG: Type{{.*}} , name = "bool", size = 1, compiler_type = 0x{{[0-9a-f]+}} _Bool
 // CHECK-DAG: Type{{.*}} , name = "char", size = 1, compiler_type = 0x{{[0-9a-f]+}} char
 // CHECK-DAG: Type{{.*}} , name = "unsigned char", size = 1, compiler_type = 0x{{[0-9a-f]+}} unsigned char
 // CHECK-DAG: Type{{.*}} , name = "char8_t", size = 1, compiler_type = 0x{{[0-9a-f]+}} char8_t
 
-// CHECK-DAG: Type{{.*}} , size = 2, compiler_type = 0x{{[0-9a-f]+}} short
-// CHECK-DAG: Type{{.*}} , name = "const volatile ", size = 2, compiler_type = 0x{{[0-9a-f]+}} const volatile short
-// CHECK-DAG: Type{{.*}} , name = "const ", size = 2, compiler_type = 0x{{[0-9a-f]+}} const short
-// CHECK-DAG: Type{{.*}} , name = "volatile ", size = 2, compiler_type = 0x{{[0-9a-f]+}} volatile short
+// CHECK-DAG: Type{{.*}} , name = "short", size = 2, compiler_type = 0x{{[0-9a-f]+}} short
+// CHECK-DAG: Type{{.*}} , name = "const volatile short", size = 2, compiler_type = 0x{{[0-9a-f]+}} const volatile short
+// CHECK-DAG: Type{{.*}} , name = "const short", size = 2, compiler_type = 0x{{[0-9a-f]+}} const short
+// CHECK-DAG: Type{{.*}} , name = "volatile short", size = 2, compiler_type = 0x{{[0-9a-f]+}} volatile short
 
 // CHECK-DAG: Type{{.*}} , name = "unsigned short", size = 2, compiler_type = 0x{{[0-9a-f]+}} unsigned short
 // CHECK-DAG: Type{{.*}} , name = "wchar_t", size = 2, compiler_type = 0x{{[0-9a-f]+}} wchar_t
@@ -83,12 +91,19 @@ int main() {
 // CHECK-DAG: Type{{.*}} , name = "unsigned long", size = 4, compiler_type = 0x{{[0-9a-f]+}} unsigned long
 // CHECK-DAG: Type{{.*}} , name = "char32_t", size = 4, compiler_type = 0x{{[0-9a-f]+}} char32_t
 
-// CHECK-DAG: Type{{.*}} , name = "int64_t", size = 8, compiler_type = 0x{{[0-9a-f]+}} long long
-// CHECK-DAG: Type{{.*}} , name = "uint64_t", size = 8, compiler_type = 0x{{[0-9a-f]+}} unsigned long long
+// CHECK-DAG: Type{{.*}} , name = "long long", size = 8, compiler_type = 0x{{[0-9a-f]+}} long long
+// CHECK-DAG: Type{{.*}} , name = "unsigned long long", size = 8, compiler_type = 0x{{[0-9a-f]+}} unsigned long long
 
+// CHECK-DAG: Type{{.*}} , name = "__int128", size = 16, compiler_type = 0x{{[0-9a-f]+}} __int128
+// CHECK-DAG: Type{{.*}} , name = "unsigned __int128", size = 16, compiler_type = 0x{{[0-9a-f]+}} unsigned __int128
+
+// CHECK-DAG: Type{{.*}} , name = "_Float16", size = 2, compiler_type = 0x{{[0-9a-f]+}} __fp16
 // CHECK-DAG: Type{{.*}} , name = "float", size = 4, compiler_type = 0x{{[0-9a-f]+}} float
 // CHECK-DAG: Type{{.*}} , name = "const float", size = 4, compiler_type = 0x{{[0-9a-f]+}} const float
 
+// CHECK-DAG: Type{{.*}} , name = "_Complex float", size = 4, compiler_type = 0x{{[0-9a-f]+}} _Complex float
+// CHECK-DAG: Type{{.*}} , name = "_Complex double", size = 8, compiler_type = 0x{{[0-9a-f]+}} _Complex double
+
 // CHECK-DAG:  Type{{.*}} , name = "ReturnedStruct1", size = 1, decl = simple-types.cpp:21, compiler_type = 0x{{[0-9a-f]+}} struct ReturnedStruct1 {
 // CHECK-DAG:  Type{{.*}} , name = "ReturnedStruct2", size = 1, decl = simple-types.cpp:22, compiler_type = 0x{{[0-9a-f]+}} struct ReturnedStruct2 {
 // CHECK-DAG:  Type{{.*}} , name = "MyStruct", size = 1, decl = simple-types.cpp:24, compiler_type = 0x{{[0-9a-f]+}} struct MyStruct {
diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp
index b06c6bf..b2d7937 100644
--- a/lldb/tools/debugserver/source/RNBRemote.cpp
+++ b/lldb/tools/debugserver/source/RNBRemote.cpp
@@ -170,6 +170,20 @@ static uint64_t decode_uint64(const char *p, int base, char **end = nullptr,
   return addr;
 }
 
+/// Attempts to parse a prefix of `number_str` as a uint64_t. If
+/// successful, the number is returned and the prefix is dropped from
+/// `number_str`.
+static std::optional<uint64_t> extract_u64(std::string_view &number_str) {
+  char *str_end = nullptr;
+  errno = 0;
+  uint64_t number = strtoull(number_str.data(), &str_end, 16);
+  if (errno != 0)
+    return std::nullopt;
+  assert(str_end);
+  number_str.remove_prefix(str_end - number_str.data());
+  return number;
+}
+
 static void append_hex_value(std::ostream &ostrm, const void *buf,
                              size_t buf_size, bool swap) {
   int i;
@@ -204,6 +218,25 @@ static void append_hexified_string(std::ostream &ostrm,
   }
 }
 
+/// Returns true if `str` starts with `prefix`.
+static bool starts_with(std::string_view str, std::string_view prefix) {
+  return str.substr(0, prefix.size()) == prefix;
+}
+
+/// Splits `list_str` into multiple string_views separated by `,`.
+static std::vector<std::string_view>
+parse_comma_separated_list(std::string_view list_str) {
+  std::vector<std::string_view> list;
+  while (!list_str.empty()) {
+    auto pos = list_str.find(',');
+    list.push_back(list_str.substr(0, pos));
+    if (pos == list_str.npos)
+      break;
+    list_str.remove_prefix(pos + 1);
+  }
+  return list;
+}
+
 // from System.framework/Versions/B/PrivateHeaders/sys/codesign.h
 extern "C" {
 #define CS_OPS_STATUS 0       /* return status */
@@ -270,6 +303,11 @@ void RNBRemote::CreatePacketTable() {
                      "Read memory"));
   t.push_back(Packet(read_register, &RNBRemote::HandlePacket_p, NULL, "p",
                      "Read one register"));
+  // Careful: this *must* come before the `M` packet, as debugserver matches
+  // packet prefixes against known packet names. Inverting the order would match
+  // `MultiMemRead` as an `M` packet.
+  t.push_back(Packet(multi_mem_read, &RNBRemote::HandlePacket_MultiMemRead,
+                     NULL, "MultiMemRead", "Read multiple memory addresses"));
   t.push_back(Packet(write_memory, &RNBRemote::HandlePacket_M, NULL, "M",
                      "Write memory"));
   t.push_back(Packet(write_register, &RNBRemote::HandlePacket_P, NULL, "P",
@@ -3150,6 +3188,88 @@ rnb_err_t RNBRemote::HandlePacket_m(const char *p) {
   return SendPacket(ostrm.str());
 }
 
+rnb_err_t RNBRemote::HandlePacket_MultiMemRead(const char *p) {
+  const std::string_view packet_name("MultiMemRead:");
+  std::string_view packet(p);
+
+  if (!starts_with(packet, packet_name))
+    return HandlePacket_ILLFORMED(__FILE__, __LINE__, p,
+                                  "Invalid MultiMemRead packet prefix");
+
+  packet.remove_prefix(packet_name.size());
+
+  const std::string_view ranges_prefix("ranges:");
+  if (!starts_with(packet, ranges_prefix))
+    return HandlePacket_ILLFORMED(__FILE__, __LINE__, packet.data(),
+                                  "Missing 'ranges' in MultiMemRead packet");
+  packet.remove_prefix(ranges_prefix.size());
+
+  std::vector<std::pair<nub_addr_t, std::size_t>> ranges;
+  std::size_t total_length = 0;
+
+  // Ranges should have the form: <addr>,<size>[,<addr>,<size>]*;
+  auto end_of_ranges_pos = packet.find(';');
+  if (end_of_ranges_pos == packet.npos)
+    return HandlePacket_ILLFORMED(__FILE__, __LINE__, packet.data(),
+                                  "MultiMemRead missing end of ranges marker");
+
+  std::vector<std::string_view> numbers_list =
+      parse_comma_separated_list(packet.substr(0, end_of_ranges_pos));
+  packet.remove_prefix(end_of_ranges_pos + 1);
+
+  // Ranges are pairs, so the number of elements must be even.
+  if (numbers_list.size() % 2 == 1)
+    return HandlePacket_ILLFORMED(
+        __FILE__, __LINE__, p,
+        "MultiMemRead has an odd number of numbers for the ranges");
+
+  for (unsigned idx = 0; idx < numbers_list.size(); idx += 2) {
+    std::optional<uint64_t> maybe_addr = extract_u64(numbers_list[idx]);
+    std::optional<uint64_t> maybe_length = extract_u64(numbers_list[idx + 1]);
+    if (!maybe_addr || !maybe_length)
+      return HandlePacket_ILLFORMED(__FILE__, __LINE__, packet.data(),
+                                    "Invalid MultiMemRead range");
+    // A sanity check that the packet requested is not too large or a negative
+    // number.
+    if (*maybe_length > 4 * 1024 * 1024)
+      return HandlePacket_ILLFORMED(__FILE__, __LINE__, packet.data(),
+                                    "MultiMemRead length is too large");
+
+    ranges.emplace_back(*maybe_addr, *maybe_length);
+    total_length += *maybe_length;
+  }
+
+  if (ranges.empty())
+    return HandlePacket_ILLFORMED(__FILE__, __LINE__, p,
+                                  "MultiMemRead has an empty range list");
+
+  if (!packet.empty())
+    return HandlePacket_ILLFORMED(
+        __FILE__, __LINE__, p, "MultiMemRead packet has unrecognized fields");
+
+  std::vector<std::vector<uint8_t>> buffers;
+  buffers.reserve(ranges.size());
+  for (auto [base_addr, length] : ranges) {
+    buffers.emplace_back(length, 0);
+    nub_size_t bytes_read = DNBProcessMemoryRead(m_ctx.ProcessID(), base_addr,
+                                                 length, buffers.back().data());
+    buffers.back().resize(bytes_read);
+  }
+
+  std::ostringstream reply_stream;
+  bool first = true;
+  for (const std::vector<uint8_t> &buffer : buffers) {
+    reply_stream << (first ? "" : ",") << std::hex << buffer.size();
+    first = false;
+  }
+  reply_stream << ';';
+
+  for (const std::vector<uint8_t> &buffer : buffers)
+    binary_encode_data_vector(reply_stream, buffer);
+
+  return SendPacket(reply_stream.str());
+}
+
 // Read memory, sent it up as binary data.
 // Usage:  xADDR,LEN
 // ADDR and LEN are both base 16.
@@ -3503,6 +3623,7 @@ rnb_err_t RNBRemote::HandlePacket_qSupported(const char *p) {
   if (supports_memory_tagging())
     reply << "memory-tagging+;";
 
+  reply << "MultiMemRead+;";
   return SendPacket(reply.str().c_str());
 }
 
diff --git a/lldb/tools/debugserver/source/RNBRemote.h b/lldb/tools/debugserver/source/RNBRemote.h
index cf1c978..b32c00a 100644
--- a/lldb/tools/debugserver/source/RNBRemote.h
+++ b/lldb/tools/debugserver/source/RNBRemote.h
@@ -136,6 +136,7 @@ public:
     query_transfer,                     // 'qXfer:'
     json_query_dyld_process_state,      // 'jGetDyldProcessState'
     enable_error_strings,               // 'QEnableErrorStrings'
+    multi_mem_read,                     // 'MultiMemRead'
     unknown_type
   };
   // clang-format on
@@ -216,6 +217,7 @@ public:
   rnb_err_t HandlePacket_last_signal(const char *p);
   rnb_err_t HandlePacket_m(const char *p);
   rnb_err_t HandlePacket_M(const char *p);
+  rnb_err_t HandlePacket_MultiMemRead(const char *p);
   rnb_err_t HandlePacket_x(const char *p);
   rnb_err_t HandlePacket_X(const char *p);
   rnb_err_t HandlePacket_z(const char *p);
diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp
index 7738913..e7d9b89 100644
--- a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp
@@ -57,7 +57,7 @@ SetupIORedirection(const std::vector<std::optional<std::string>> &stdio,
   size_t n = std::max(stdio.size(), static_cast<size_t>(3));
   for (size_t i = 0; i < n; i++) {
     std::optional<std::string> path;
-    if (stdio.size() < i)
+    if (stdio.size() <= i)
       path = stdio.back();
     else
       path = stdio[i];
@@ -107,7 +107,7 @@ RunInTerminal(DAP &dap, const protocol::LaunchRequestArguments &arguments) {
 
   llvm::json::Object reverse_request = CreateRunInTerminalReverseRequest(
       arguments.configuration.program, arguments.args, arguments.env,
-      arguments.cwd, comm_file.m_path, debugger_pid,
+      arguments.cwd, comm_file.m_path, debugger_pid, arguments.stdio,
       arguments.console == protocol::eConsoleExternalTerminal);
   dap.SendReverseRequest<LogFailureResponseHandler>("runInTerminal",
                                                     std::move(reverse_request));
diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp
index 4f26599..71e91f8 100644
--- a/lldb/tools/lldb-dap/JSONUtils.cpp
+++ b/lldb/tools/lldb-dap/JSONUtils.cpp
@@ -866,7 +866,8 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit) {
 llvm::json::Object CreateRunInTerminalReverseRequest(
     llvm::StringRef program, const std::vector<std::string> &args,
     const llvm::StringMap<std::string> &env, llvm::StringRef cwd,
-    llvm::StringRef comm_file, lldb::pid_t debugger_pid, bool external) {
+    llvm::StringRef comm_file, lldb::pid_t debugger_pid,
+    const std::vector<std::optional<std::string>> &stdio, bool external) {
   llvm::json::Object run_in_terminal_args;
   if (external) {
     // This indicates the IDE to open an external terminal window.
@@ -885,6 +886,18 @@ llvm::json::Object CreateRunInTerminalReverseRequest(
   }
   req_args.push_back("--launch-target");
   req_args.push_back(program.str());
+  if (!stdio.empty()) {
+    req_args.push_back("--stdio");
+    std::stringstream ss;
+    for (const std::optional<std::string> &file : stdio) {
+      if (file)
+        ss << *file;
+      ss << ":";
+    }
+    std::string files = ss.str();
+    files.pop_back();
+    req_args.push_back(std::move(files));
+  }
   req_args.insert(req_args.end(), args.begin(), args.end());
   run_in_terminal_args.try_emplace("args", req_args);
 
diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h
index e9094f6..0c865a3 100644
--- a/lldb/tools/lldb-dap/JSONUtils.h
+++ b/lldb/tools/lldb-dap/JSONUtils.h
@@ -388,6 +388,10 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit);
 ///     launcher uses it on Linux tell the kernel that it should allow the
 ///     debugger process to attach.
 ///
+/// \param[in] stdio
+///     An array of file paths for redirecting the program's standard IO
+///     streams.
+///
 /// \param[in] external
 ///     If set to true, the program will run in an external terminal window
 ///     instead of IDE's integrated terminal.
@@ -398,7 +402,8 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit &unit);
 llvm::json::Object CreateRunInTerminalReverseRequest(
     llvm::StringRef program, const std::vector<std::string> &args,
     const llvm::StringMap<std::string> &env, llvm::StringRef cwd,
-    llvm::StringRef comm_file, lldb::pid_t debugger_pid, bool external);
+    llvm::StringRef comm_file, lldb::pid_t debugger_pid,
+    const std::vector<std::optional<std::string>> &stdio, bool external);
 
 /// Create a "Terminated" JSON object that contains statistics
 ///
diff --git a/lldb/tools/lldb-dap/Options.td b/lldb/tools/lldb-dap/Options.td
index c8492c6..5e9dd7a 100644
--- a/lldb/tools/lldb-dap/Options.td
+++ b/lldb/tools/lldb-dap/Options.td
@@ -47,6 +47,12 @@ def debugger_pid: S<"debugger-pid">,
   HelpText<"The PID of the lldb-dap instance that sent the launchInTerminal "
     "request when using --launch-target.">;
 
+def stdio: S<"stdio">,
+  MetaVarName<"<stdin:stdout:stderr:...>">,
+  HelpText<"An array of file paths for redirecting the program's standard IO "
+    "streams. A colon-separated list of entries. Empty value means no "
+    "redirection.">;
+
 def repl_mode
     : S<"repl-mode">,
       MetaVarName<"<mode>">,
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
index 92dada2..a85a68b 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h
@@ -300,6 +300,7 @@ struct LaunchRequestArguments {
   /// terminal or external terminal.
   Console console = eConsoleInternal;
 
+  /// An array of file paths for redirecting the program's standard IO streams.
   std::vector<std::optional<std::string>> stdio;
 
   /// @}
diff --git a/lldb/tools/lldb-dap/ProtocolUtils.cpp b/lldb/tools/lldb-dap/ProtocolUtils.cpp
index 775c82f..868c67c 100644
--- a/lldb/tools/lldb-dap/ProtocolUtils.cpp
+++ b/lldb/tools/lldb-dap/ProtocolUtils.cpp
@@ -301,6 +301,14 @@ Variable CreateVariable(lldb::SBValue v, int64_t var_ref, bool format_hex,
   if (lldb::addr_t addr = v.GetLoadAddress(); addr != LLDB_INVALID_ADDRESS)
     var.memoryReference = addr;
 
+  bool is_readonly = v.GetType().IsAggregateType() ||
+                     v.GetValueType() == lldb::eValueTypeRegisterSet;
+  if (is_readonly) {
+    if (!var.presentationHint)
+      var.presentationHint = {VariablePresentationHint()};
+    var.presentationHint->attributes.push_back("readOnly");
+  }
+
   return var;
 }
 
diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json
index e961c2e..3f0f150 100644
--- a/lldb/tools/lldb-dap/package.json
+++ b/lldb/tools/lldb-dap/package.json
@@ -626,7 +626,10 @@
               "stdio": {
                 "type": "array",
                 "items": {
-                  "type": "string"
+                  "type": [
+                    "string",
+                    "null"
+                  ]
                 },
                 "description": "The stdio property specifies the redirection targets for the debuggee's stdio streams. A null value redirects a stream to the default debug terminal. String can be a path to file, named pipe or TTY device. If less than three values are provided, the list will be padded with the last value. Specifying more than three values will create additional file descriptors (4, 5, etc.).",
                 "default": []
diff --git a/lldb/tools/lldb-dap/tool/lldb-dap.cpp b/lldb/tools/lldb-dap/tool/lldb-dap.cpp
index 93446c0..e59cef9 100644
--- a/lldb/tools/lldb-dap/tool/lldb-dap.cpp
+++ b/lldb/tools/lldb-dap/tool/lldb-dap.cpp
@@ -16,6 +16,7 @@
 #include "lldb/API/SBStream.h"
 #include "lldb/Host/Config.h"
 #include "lldb/Host/File.h"
+#include "lldb/Host/FileSystem.h"
 #include "lldb/Host/MainLoop.h"
 #include "lldb/Host/MainLoopBase.h"
 #include "lldb/Host/MemoryMonitor.h"
@@ -24,7 +25,9 @@
 #include "lldb/Utility/UriParser.h"
 #include "lldb/lldb-forward.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Option/Arg.h"
@@ -42,8 +45,10 @@
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <condition_variable>
+#include <cstddef>
 #include <cstdio>
 #include <cstdlib>
+#include <exception>
 #include <fcntl.h>
 #include <map>
 #include <memory>
@@ -143,6 +148,74 @@ static void PrintVersion() {
   llvm::outs() << "liblldb: " << lldb::SBDebugger::GetVersionString() << '\n';
 }
 
+#if not defined(_WIN32)
+struct FDGroup {
+  int GetFlags() const {
+    if (read && write)
+      return O_NOCTTY | O_CREAT | O_RDWR;
+    if (read)
+      return O_NOCTTY | O_RDONLY;
+    return O_NOCTTY | O_CREAT | O_WRONLY | O_TRUNC;
+  }
+
+  std::vector<int> fds;
+  bool read = false;
+  bool write = false;
+};
+
+static llvm::Error RedirectToFile(const FDGroup &fdg, llvm::StringRef file) {
+  if (!fdg.read && !fdg.write)
+    return llvm::Error::success();
+  int target_fd = lldb_private::FileSystem::Instance().Open(
+      file.str().c_str(), fdg.GetFlags(), 0666);
+  if (target_fd == -1)
+    return llvm::errorCodeToError(
+        std::error_code(errno, std::generic_category()));
+  for (int fd : fdg.fds) {
+    if (target_fd == fd)
+      continue;
+    if (::dup2(target_fd, fd) == -1)
+      return llvm::errorCodeToError(
+          std::error_code(errno, std::generic_category()));
+  }
+  ::close(target_fd);
+  return llvm::Error::success();
+}
+
+static llvm::Error
+SetupIORedirection(const llvm::SmallVectorImpl<llvm::StringRef> &files) {
+  llvm::SmallDenseMap<llvm::StringRef, FDGroup> groups;
+  for (size_t i = 0; i < files.size(); i++) {
+    if (files[i].empty())
+      continue;
+    auto group = groups.find(files[i]);
+    if (group == groups.end())
+      group = groups.insert({files[i], {{static_cast<int>(i)}}}).first;
+    else
+      group->second.fds.push_back(i);
+    switch (i) {
+    case 0:
+      group->second.read = true;
+      break;
+    case 1:
+    case 2:
+      group->second.write = true;
+      break;
+    default:
+      group->second.read = true;
+      group->second.write = true;
+      break;
+    }
+  }
+  for (const auto &[file, group] : groups) {
+    if (llvm::Error err = RedirectToFile(group, file))
+      return llvm::createStringError(
+          llvm::formatv("{0}: {1}", file, llvm::toString(std::move(err))));
+  }
+  return llvm::Error::success();
+}
+#endif
+
 // If --launch-target is provided, this instance of lldb-dap becomes a
 // runInTerminal launcher. It will ultimately launch the program specified in
 // the --launch-target argument, which is the original program the user wanted
@@ -165,6 +238,7 @@ static void PrintVersion() {
 static llvm::Error LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg,
                                              llvm::StringRef comm_file,
                                              lldb::pid_t debugger_pid,
+                                             llvm::StringRef stdio,
                                              char *argv[]) {
 #if defined(_WIN32)
   return llvm::createStringError(
@@ -179,6 +253,16 @@ static llvm::Error LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg,
     (void)prctl(PR_SET_PTRACER, debugger_pid, 0, 0, 0);
 #endif
 
+  lldb_private::FileSystem::Initialize();
+  if (!stdio.empty()) {
+    llvm::SmallVector<llvm::StringRef, 3> files;
+    stdio.split(files, ':');
+    while (files.size() < 3)
+      files.push_back(files.back());
+    if (llvm::Error err = SetupIORedirection(files))
+      return err;
+  }
+
   RunInTerminalLauncherCommChannel comm_channel(comm_file);
   if (llvm::Error err = comm_channel.NotifyPid())
     return err;
@@ -484,9 +568,10 @@ int main(int argc, char *argv[]) {
           break;
         }
       }
+      llvm::StringRef stdio = input_args.getLastArgValue(OPT_stdio);
       if (llvm::Error err =
               LaunchRunInTerminalTarget(*target_arg, comm_file->getValue(), pid,
-                                        argv + target_args_pos)) {
+                                        stdio, argv + target_args_pos)) {
         llvm::errs() << llvm::toString(std::move(err)) << '\n';
         return EXIT_FAILURE;
       }
diff --git a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
index f940229..012eae0 100644
--- a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
+++ b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
@@ -639,3 +639,25 @@ TEST_F(GDBRemoteCommunicationClientTest, CalculateMD5) {
   EXPECT_EQ(expected_high, result->high());
 }
 #endif
+
+TEST_F(GDBRemoteCommunicationClientTest, MultiMemReadSupported) {
+  std::future<bool> async_result = std::async(std::launch::async, [&] {
+    StringExtractorGDBRemote qSupported_packet_request;
+    server.GetPacket(qSupported_packet_request);
+    server.SendPacket("MultiMemRead+;");
+    return true;
+  });
+  ASSERT_TRUE(client.GetMultiMemReadSupported());
+  async_result.wait();
+}
+
+TEST_F(GDBRemoteCommunicationClientTest, MultiMemReadNotSupported) {
+  std::future<bool> async_result = std::async(std::launch::async, [&] {
+    StringExtractorGDBRemote qSupported_packet_request;
+    server.GetPacket(qSupported_packet_request);
+    server.SendPacket(";");
+    return true;
+  });
+  ASSERT_FALSE(client.GetMultiMemReadSupported());
+  async_result.wait();
+}
diff --git a/lldb/unittests/Utility/XcodeSDKTest.cpp b/lldb/unittests/Utility/XcodeSDKTest.cpp
index de9f91a..a8a597b 100644
--- a/lldb/unittests/Utility/XcodeSDKTest.cpp
+++ b/lldb/unittests/Utility/XcodeSDKTest.cpp
@@ -27,6 +27,7 @@ TEST(XcodeSDKTest, ParseTest) {
   EXPECT_EQ(XcodeSDK("AppleTVOS.sdk").GetType(), XcodeSDK::AppleTVOS);
   EXPECT_EQ(XcodeSDK("WatchSimulator.sdk").GetType(), XcodeSDK::WatchSimulator);
   EXPECT_EQ(XcodeSDK("WatchOS.sdk").GetType(), XcodeSDK::watchOS);
+  EXPECT_EQ(XcodeSDK("BridgeOS.sdk").GetType(), XcodeSDK::BridgeOS);
   EXPECT_EQ(XcodeSDK("XRSimulator.sdk").GetType(), XcodeSDK::XRSimulator);
   EXPECT_EQ(XcodeSDK("XROS.sdk").GetType(), XcodeSDK::XROS);
   EXPECT_EQ(XcodeSDK("Linux.sdk").GetType(), XcodeSDK::Linux);
diff --git a/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst b/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst
index ba670d3..f472b862 100644
--- a/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst
+++ b/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst
@@ -37,13 +37,13 @@ includes contributions to open source projects such as LLVM [:ref:`LLVM
 
 The LLVM compiler has upstream support for commercially available AMD GPU
 hardware (AMDGPU) [:ref:`AMDGPU-LLVM <amdgpu-dwarf-AMDGPU-LLVM>`]. The open
-source ROCgdb [:ref:`AMD-ROCgdb <amdgpu-dwarf-AMD-ROCgdb>`] GDB based debugger
+source ROCgdb [:ref:`AMD-ROCgdb <amdgpu-dwarf-AMD-ROCgdb>`] GDB-based debugger
 also has support for AMDGPU which is being upstreamed. Support for AMDGPU is
 also being added by third parties to the GCC [:ref:`GCC <amdgpu-dwarf-GCC>`]
 compiler and the Perforce TotalView HPC Debugger [:ref:`Perforce-TotalView
 <amdgpu-dwarf-Perforce-TotalView>`].
 
-To support debugging heterogeneous programs several features that are not
+To support debugging heterogeneous programs, several features that are not
 provided by current DWARF Version 5 [:ref:`DWARF <amdgpu-dwarf-DWARF>`] have
 been identified. The :ref:`amdgpu-dwarf-extensions` section gives an overview of
 the extensions devised to address the missing features. The extensions seek to
@@ -107,7 +107,7 @@ for each in terms of heterogeneous debugging.
 DWARF Version 5 does not allow location descriptions to be entries on the DWARF
 expression stack. They can only be the final result of the evaluation of a DWARF
 expression. However, by allowing a location description to be a first-class
-entry on the DWARF expression stack it becomes possible to compose expressions
+entry on the DWARF expression stack, it becomes possible to compose expressions
 containing both values and location descriptions naturally. It allows objects to
 be located in any kind of memory address space, in registers, be implicit
 values, be undefined, or a composite of any of these.
@@ -123,20 +123,20 @@ non-default address spaces and generalizing the power of composite location
 descriptions to any kind of location description.
 
 For those familiar with the definition of location descriptions in DWARF Version
-5, the definitions in these extensions are presented differently, but does in
+5, the definitions in these extensions are presented differently, but do in
 fact define the same concept with the same fundamental semantics. However, it
 does so in a way that allows the concept to extend to support address spaces,
 bit addressing, the ability for composite location descriptions to be composed
 of any kind of location description, and the ability to support objects located
 at multiple places. Collectively these changes expand the set of architectures
-that can be supported and improves support for optimized code.
+that can be supported and improve support for optimized code.
 
 Several approaches were considered, and the one presented, together with the
 extensions it enables, appears to be the simplest and cleanest one that offers
 the greatest improvement of DWARF's ability to support debugging optimized GPU
 and non-GPU code. Examining the GDB debugger and LLVM compiler, it appears only
 to require modest changes as they both already have to support general use of
-location descriptions. It is anticipated that will also be the case for other
+location descriptions. It is anticipated that this will also be the case for other
 debuggers and compilers.
 
 GDB has been modified to evaluate DWARF Version 5 expressions with location
@@ -156,7 +156,7 @@ DWARF Expression Stack* [:ref:`AMDGPU-DWARF-LOC
 2.2 Generalize CFI to Allow Any Location Description Kind
 ---------------------------------------------------------
 
-CFI describes restoring callee saved registers that are spilled. Currently CFI
+CFI describes restoring callee saved registers that are spilled. Currently, CFI
 only allows a location description that is a register, memory address, or
 implicit location description. AMDGPU optimized code may spill scalar registers
 into portions of vector registers. This requires extending CFI to allow any
@@ -223,7 +223,7 @@ infinite precision offsets to allow it to correctly track a series of positive
 and negative offsets that may transiently overflow or underflow, but end up in
 range. This is simple for the arithmetic operations as they are defined in terms
 of two's complement arithmetic on a base type of a fixed size. Therefore, the
-offset operation define that integer overflow is ill-formed. This is in contrast
+offset operation defines that integer overflow is ill-formed. This is in contrast
 to the ``DW_OP_plus``, ``DW_OP_plus_uconst``, and ``DW_OP_minus`` arithmetic
 operations which define that it causes wrap-around.
 
@@ -359,7 +359,7 @@ address space at a fixed address.
 
 The ``DW_OP_LLVM_form_aspace_address`` (see
 :ref:`amdgpu-dwarf-memory-location-description-operations`) operation is defined
-to create a memory location description from an address and address space. If
+to create a memory location description from an address and address space. It
 can be used to specify the location of a variable that is allocated in a
 specific address space. This allows the size of addresses in an address space to
 be larger than the generic type. It also allows a consumer great implementation
@@ -372,7 +372,7 @@ In contrast, if the ``DW_OP_LLVM_form_aspace_address`` operation had been
 defined to produce a value, and an implicit conversion to a memory location
 description was defined, then it would be limited to the size of the generic
 type (which matches the size of the default address space). An implementation
-would likely have to use *reserved ranges* of value to represent different
+would likely have to use *reserved ranges* of values to represent different
 address spaces. Such a value would likely not match any address value in the
 actual hardware. That would require the consumer to have special treatment for
 such values.
@@ -528,7 +528,7 @@ active. To describe the conceptual location of non-active lanes requires an
 attribute that has an expression that computes the source location PC for each
 lane.
 
-For efficiency, the expression calculates the source location the wavefront as a
+For efficiency, the expression calculates the source location of the wavefront as a
 whole. This can be done using the ``DW_OP_LLVM_select_bit_piece`` (see
 :ref:`amdgpu-dwarf-operation-to-create-vector-composite-location-descriptions`)
 operation.
@@ -564,7 +564,7 @@ information entry to indicate that there is additional target architecture
 specific information in the debugging information entries of that compilation
 unit. This allows a consumer to know what extensions are present in the debugger
 information entries as is possible with the augmentation string of other
-sections. See .
+sections.
 
 The format that should be used for an augmentation string is also recommended.
 This allows a consumer to parse the string when it contains information from
@@ -581,7 +581,7 @@ See :ref:`amdgpu-dwarf-full-and-partial-compilation-unit-entries`,
 
 AMDGPU supports programming languages that include online compilation where the
 source text may be created at runtime. For example, the OpenCL and HIP language
-runtimes support online compilation. To support is, a way to embed the source
+runtimes support online compilation. To support this, a way to embed the source
 text in the debug information is provided.
 
 See :ref:`amdgpu-dwarf-line-number-information`.
@@ -589,16 +589,16 @@ See :ref:`amdgpu-dwarf-line-number-information`.
 2.17 Allow MD5 Checksums to be Optionally Present
 -------------------------------------------------
 
-In DWARF Version 5 the file timestamp and file size can be optional, but if the
-MD5 checksum is present it must be valid for all files. This is a problem if
+In DWARF Version 5, the file timestamp and file size can be optional, but if the
+MD5 checksum is present, it must be valid for all files. This is a problem if
 using link time optimization to combine compilation units where some have MD5
-checksums and some do not. Therefore, sSupport to allow MD5 checksums to be
-optionally present in the line table is added.
+checksums, and others do not. Therefore, the line table is extended to allow MD5
+checksums to be optional.
 
 See :ref:`amdgpu-dwarf-line-number-information`.
 
-2.18 Add the HIP Programing Language
-------------------------------------
+2.18 Add the HIP Programming Language
+-------------------------------------
 
 The HIP programming language [:ref:`HIP <amdgpu-dwarf-HIP>`], which is supported
 by the AMDGPU, is added.
@@ -617,7 +617,7 @@ hardware to allow a single instruction to execute multiple iterations using
 vector registers.
 
 Note that although this is similar to SIMT execution, the way a client debugger
-uses the information is fundamentally different. In SIMT execution the debugger
+uses the information is fundamentally different. In SIMT execution, the debugger
 needs to present the concurrent execution as distinct source language threads
 that the user can list and switch focus between. With iteration concurrency
 optimizations, such as software pipelining and vectorized SIMD, the debugger
@@ -648,7 +648,7 @@ language loop iterations are executing concurrently. See
 It is common in SIMD vectorization for the compiler to generate code that
 promotes portions of an array into vector registers. For example, if the
 hardware has vector registers with 8 elements, and 8 wide SIMD instructions, the
-compiler may vectorize a loop so that is executes 8 iterations concurrently for
+compiler may vectorize a loop so that it executes 8 iterations concurrently for
 each vectorized loop iteration.
 
 On the first iteration of the generated vectorized loop, iterations 0 to 7 of
@@ -691,7 +691,7 @@ Inside the loop body, the machine code loads ``src[i]`` and ``dst[i]`` into
 registers, adds them, and stores the result back into ``dst[i]``.
 
 Considering the location of ``dst`` and ``src`` in the loop body, the elements
-``dst[i]`` and ``src[i]`` would be located in registers, all other elements are
+``dst[i]`` and ``src[i]`` would be located in registers; all other elements are
 located in memory. Let register ``R0`` contain the base address of ``dst``,
 register ``R1`` contain ``i``, and register ``R2`` contain the registerized
 ``dst[i]`` element. We can describe the location of ``dst`` as a memory location
@@ -722,7 +722,7 @@ with a register location overlaid at a runtime offset involving ``i``:
 ----------------------------------------------
 
 AMDGPU supports languages, such as OpenCL, that define source language memory
-spaces. Support is added to define language specific memory spaces so they can
+spaces. Support is added to define language-specific memory spaces so they can
 be used in a consistent way by consumers. See :ref:`amdgpu-dwarf-memory-spaces`.
 
 A new attribute ``DW_AT_LLVM_memory_space`` is added to support using memory
@@ -738,9 +738,9 @@ accommodates only 32 unique operations. In practice, the lack of a central
 registry and a desire for backwards compatibility means vendor extensions are
 never retired, even when standard versions are accepted into DWARF proper. This
 has produced a situation where the effective encoding space available for new
-vendor extensions is miniscule today.
+vendor extensions is minuscule today.
 
-To expand this encoding space a new DWARF operation ``DW_OP_LLVM_user`` is
+To expand this encoding space, a new DWARF operation ``DW_OP_LLVM_user`` is
 added which acts as a "prefix" for vendor extensions. It is followed by a
 ULEB128 encoded vendor extension opcode, which is then followed by the operands
 of the corresponding vendor extension operation.
@@ -776,7 +776,7 @@ A. Changes Relative to DWARF Version 5
   .. note::
 
     Notes are included to describe how the changes are to be applied to the
-    DWARF Version 5 standard. They also describe rational and issues that may
+    DWARF Version 5 standard. They also describe rationale and issues that may
     need further consideration.
 
 A.2 General Description
@@ -898,7 +898,7 @@ elements that can be specified are:
 
 *A current lane*
 
-  The 0 based SIMT lane identifier to be used in evaluating a user presented
+  The 0-based SIMT lane identifier to be used in evaluating a user presented
   expression. This applies to source languages that are implemented for a target
   architecture using a SIMT execution model. These implementations map source
   language threads of execution to lanes of the target architecture threads.
@@ -917,7 +917,7 @@ elements that can be specified are:
 
 *A current iteration*
 
-  The 0 based source language iteration instance to be used in evaluating a user
+  The 0-based source language iteration instance to be used in evaluating a user
   presented expression. This applies to target architectures that support
   optimizations that result in executing multiple source language loop iterations
   concurrently.
@@ -1845,7 +1845,7 @@ There are these special value operations currently defined:
       interpreted as a value of T. If a conversion is wanted it can be done
       explicitly using a ``DW_OP_convert`` operation.
 
-      GDB has a per register hook that allows a target specific conversion on a
+      GDB has a per register hook that allows a target-specific conversion on a
       register by register basis. It defaults to truncation of bigger registers.
       Removing use of the target hook does not cause any test failures in common
       architectures. If the compiler for a target architecture did want some
@@ -1855,7 +1855,7 @@ There are these special value operations currently defined:
       If T is a larger type than the register size, then the default GDB
       register hook reads bytes from the next register (or reads out of bounds
       for the last register!). Removing use of the target hook does not cause
-      any test failures in common architectures (except an illegal hand written
+      any test failures in common architectures (except an illegal hand-written
       assembly test). If a target architecture requires this behavior, these
       extensions allow a composite location description to be used to combine
       multiple registers.
@@ -2283,7 +2283,7 @@ bit offset equal to V scaled by 8 (the byte size).
   The implicit conversion could also be defined as target architecture specific.
   For example, GDB checks if V is an integral type. If it is not it gives an
   error. Otherwise, GDB zero-extends V to 64 bits. If the GDB target defines a
-  hook function, then it is called. The target specific hook function can modify
+  hook function, then it is called. The target-specific hook function can modify
   the 64-bit value, possibly sign extending based on the original value type.
   Finally, GDB treats the 64-bit value V as a memory location address.
 
diff --git a/llvm/docs/CMakeLists.txt b/llvm/docs/CMakeLists.txt
index b4522e3..fc37c6d 100644
--- a/llvm/docs/CMakeLists.txt
+++ b/llvm/docs/CMakeLists.txt
@@ -136,17 +136,23 @@ if( NOT uses_ocaml LESS 0 AND LLVM_ENABLE_OCAMLDOC )
     list(APPEND odoc_files -load ${odoc_file})
   endforeach()
 
-  add_custom_target(ocaml_doc
-    COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
-    COMMAND ${OCAMLFIND} ocamldoc -d ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
-                                  -sort -colorize-code -html ${odoc_files}
-    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/_ocamldoc/style.css
-                                     ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html)
+  set(OCAML_DOC_ADD_TO_ALL "")
+  if(LLVM_BUILD_DOCS)
+    set(OCAML_DOC_ADD_TO_ALL ALL)
+  endif()
+
+  add_custom_target(ocaml_doc ${OCAML_DOC_ADD_TO_ALL}
+      COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
+      COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
+      COMMAND ${OCAMLFIND} ocamldoc -d ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
+                                    -sort -colorize-code -html ${odoc_files}
+      COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/_ocamldoc/style.css
+                                       ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html)
 
   add_dependencies(ocaml_doc ${doc_targets})
 
-  if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
+
+  if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY AND LLVM_BUILD_DOCS)
     # ./ suffix is needed to copy the contents of html directory without
     # appending html/ into LLVM_INSTALL_OCAMLDOC_HTML_DIR.
     install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html/.
diff --git a/llvm/docs/CallGraphSection.md b/llvm/docs/CallGraphSection.md
index 8b18727..84d6061 100644
--- a/llvm/docs/CallGraphSection.md
+++ b/llvm/docs/CallGraphSection.md
@@ -1,10 +1,10 @@
-# .callgraph Section Layout
+# .llvm.callgraph Section Layout
 
-The `.callgraph` section is used to store call graph information for each function. The section contains a series of records, with each record corresponding to a single function.
+The `.llvm.callgraph` section is used to store call graph information for each function. The section contains a series of records, with each record corresponding to a single function.
 
 ## Per Function Record Layout
 
-Each record in the `.callgraph` section has the following binary layout:
+Each record in the `.llvm.callgraph` section has the following binary layout:
 
 | Field                                  | Type          | Size (bits) | Description                                                                                             |
 | -------------------------------------- | ------------- | ----------- | ------------------------------------------------------------------------------------------------------- |
diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst
index 91dcd5c8..f253e02f 100644
--- a/llvm/docs/DirectX/DXILResources.rst
+++ b/llvm/docs/DirectX/DXILResources.rst
@@ -746,3 +746,92 @@ Examples:
        @llvm.dx.resource.load.cbufferrow.8(
            target("dx.CBuffer", target("dx.Layout", {i16}, 2, 0)) %buffer,
            i32 %index)
+
+Resource dimensions
+-------------------
+
+*relevant types: Textures and Buffer*
+
+The `getDimensions`_ DXIL operation returns the dimensions of a texture or
+buffer resource. It returns a `Dimensions`_ type, which is a struct
+containing four ``i32`` values. The values in the struct represent the size
+of each dimension of the resource, and when aplicable the number of array
+elements or number of samples. The mapping is defined in the
+`getDimensions`_ documentation.
+
+The LLVM IR representation of this operation has several forms
+depending on the resource type and the specific ``getDimensions`` query.
+The intrinsics return a scalar or anonymous struct with up to 4 `i32`
+elements. The intrinsic names include suffixes to indicate the number of
+elements in the return value. The suffix `.x` indicates a single `i32`
+return value, `.xy` indicates a struct with two `i32` values, and `.xyz`
+indicates a struct with three `i32` values.
+
+Intrinsics representing queries on multisampled texture resources include
+`.ms.` in their name and their return value includes an additional `i32` for
+the number of samples.
+
+Intrinsics with `mip_level` argument and `.levels.` in their name are used
+for texture resources with multiple MIP levels. Their return
+struct includes an additional `i32` for the number of levels the resource has.
+
+.. code-block:: llvm
+
+   i32 @llvm.dx.resource.getdimensions.x( target("dx.*") handle )
+   {i32, i32} @llvm.dx.resource.getdimensions.xy( target("dx.*") handle )
+   {i32, i32, i32} @llvm.dx.resource.getdimensions.xyz( target("dx.*") handle )
+   {i32, i32} @llvm.dx.resource.getdimensions.levels.x( target("dx.*") handle, i32 mip_level )
+   {i32, i32, i32} @llvm.dx.resource.getdimensions.levels.xy( target("dx.*") handle, i32 mip_level )
+   {i32, i32, i32, i32} @llvm.dx.resource.getdimensions.levels.xyz( target("dx.*") handle, i32 mip_level )
+   {i32, i32, i32} @llvm.dx.resource.getdimensions.ms.xy( target("dx.*") handle )
+   {i32, i32, i32, i32} @llvm.dx.resource.getdimensions.ms.xyz( target("dx.*") handle )
+
+.. list-table:: ``@llvm.dx.resource.getdimensions.*``
+   :header-rows: 1
+
+   * - Argument
+     -
+     - Type
+     - Description
+   * - Return value
+     -
+     - `i32`, `{i32, i32}`, `{i32, i32, i32}`, or `{i32, i32, i32, i32}`
+     - Width, height, and depth of the resource (based on the specific suffix), and a number of levels or samples where aplicable.
+   * - ``%handle``
+     - 0
+     - ``target(dx.*)``
+     - Resource handle
+   * - ``%mip_level``
+     - 1
+     - ``i32``
+     - MIP level for the requested dimensions.
+
+Examples:
+
+.. code-block:: llvm
+
+  ; RWBuffer<float4>
+  %dim = call i32 @llvm.dx.resource.getdimensions.x(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %handle)
+
+  ; Texture2D
+  %0 = call {i32, i32} @llvm.dx.resource.getdimensions.xy(target("dx.Texture", ...) %tex2d)
+  %tex2d_width = extractvalue {i32, i32} %0, 0
+  %tex2d_height = extractvalue {i32, i32} %0, 1
+
+  ; Texture2DArray with levels
+  %1 = call {i32, i32, i32, i32} @llvm.dx.resource.getdimensions.levels.xyz(
+     target("dx.Texture", ...) %tex2darray, i32 1)
+  %tex2darray_width = extractvalue {i32, i32, i32, i32} %1, 0
+  %tex2darray_height = extractvalue {i32, i32, i32, i32} %1, 1
+  %tex2darray_elem_count = extractvalue {i32, i32, i32, i32} %1, 2
+  %tex2darray_levels_count = extractvalue {i32, i32, i32, i32} %1, 3
+
+  ; Texture2DMS
+  %2 = call {i32, i32, i32} @llvm.dx.resource.getdimensions.ms.xy(
+     target("dx.Texture", ...) %tex2dms)
+  %tex2dms_width = extractvalue {i32, i32, i32} %2, 0
+  %tex2dms_height = extractvalue {i32, i32, i32} %2, 1
+  %tex2dms_samples_count = extractvalue {i32, i32, i32} %2, 2
+
+.. _Dimensions: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#resource-operation-return-types
+.. _getDimensions: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#getdimensions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 4884e2d..0c54f57 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -7517,12 +7517,12 @@ sections that the user does not want removed after linking.
 '``unpredictable``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-``unpredictable`` metadata may be attached to any branch or switch
-instruction. It can be used to express the unpredictability of control
-flow. Similar to the ``llvm.expect`` intrinsic, it may be used to alter
-optimizations related to compare and branch instructions. The metadata
-is treated as a boolean value; if it exists, it signals that the branch
-or switch that it is attached to is completely unpredictable.
+``unpredictable`` metadata may be attached to any branch, select, or switch
+instruction. It can be used to express the unpredictability of control flow.
+Similar to the ``llvm.expect`` intrinsic, it may be used to alter optimizations
+related to compare and branch instructions. The metadata is treated as a
+boolean value; if it exists, it signals that the branch, select, or switch that
+it is attached to is completely unpredictable.
 
 .. _md_dereferenceable:
 
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 640516a..c352cd6 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -139,6 +139,8 @@ Changes to the Windows Target
 Changes to the X86 Backend
 --------------------------
 
+* `-mcpu=wildcatlake` is now supported.
+
 Changes to the OCaml bindings
 -----------------------------
 
@@ -174,6 +176,9 @@ Changes to LLDB
 
 * LLDB can now set breakpoints, show backtraces, and display variables when
   debugging Wasm with supported runtimes (WAMR and V8).
+* LLDB no longer stops processes by default when receiving SIGWINCH signals 
+  (window resize events) on Linux. This is the default on other Unix platforms.
+  You can re-enable it using `process handle --notify=true --stop=true SIGWINCH`.
 * The `show-progress` setting, which became a NOOP with the introduction of the
   statusline, now defaults to off and controls using OSC escape codes to show a
   native progress bar in supporting terminals like Ghostty and ConEmu.
diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h
index 1bb07e39..26d5682 100644
--- a/llvm/include/llvm/ADT/StringSwitch.h
+++ b/llvm/include/llvm/ADT/StringSwitch.h
@@ -14,6 +14,7 @@
 #define LLVM_ADT_STRINGSWITCH_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstring>
@@ -38,7 +39,7 @@ namespace llvm {
 ///   .Case("green", Green)
 ///   .Case("blue", Blue)
 ///   .Case("indigo", Indigo)
-///   .Cases("violet", "purple", Violet)
+///   .Cases({"violet", "purple"}, Violet)
 ///   .Default(UnknownColor);
 /// \endcode
 template<typename T, typename R = T>
@@ -65,7 +66,7 @@ public:
 
   // Case-sensitive case matchers
   StringSwitch &Case(StringLiteral S, T Value) {
-    CaseImpl(Value, S);
+    CaseImpl(S, Value);
     return *this;
   }
 
@@ -85,63 +86,68 @@ public:
 
   StringSwitch &Cases(std::initializer_list<StringLiteral> CaseStrings,
                       T Value) {
-    return CasesImpl(Value, CaseStrings);
+    return CasesImpl(CaseStrings, Value);
   }
 
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, T Value) {
-    return CasesImpl(Value, {S0, S1});
+    return CasesImpl({S0, S1}, Value);
   }
 
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       T Value) {
-    return CasesImpl(Value, {S0, S1, S2});
+    return CasesImpl({S0, S1, S2}, Value);
   }
 
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, T Value) {
-    return CasesImpl(Value, {S0, S1, S2, S3});
+    return CasesImpl({S0, S1, S2, S3}, Value);
   }
 
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, T Value) {
-    return CasesImpl(Value, {S0, S1, S2, S3, S4});
+    return CasesImpl({S0, S1, S2, S3, S4}, Value);
   }
 
+  [[deprecated("Pass cases in std::initializer_list instead")]]
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       T Value) {
-    return CasesImpl(Value, {S0, S1, S2, S3, S4, S5});
+    return CasesImpl({S0, S1, S2, S3, S4, S5}, Value);
   }
 
+  [[deprecated("Pass cases in std::initializer_list instead")]]
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       StringLiteral S6, T Value) {
-    return CasesImpl(Value, {S0, S1, S2, S3, S4, S5, S6});
+    return CasesImpl({S0, S1, S2, S3, S4, S5, S6}, Value);
   }
 
+  [[deprecated("Pass cases in std::initializer_list instead")]]
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       StringLiteral S6, StringLiteral S7, T Value) {
-    return CasesImpl(Value, {S0, S1, S2, S3, S4, S5, S6, S7});
+    return CasesImpl({S0, S1, S2, S3, S4, S5, S6, S7}, Value);
   }
 
+  [[deprecated("Pass cases in std::initializer_list instead")]]
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       StringLiteral S6, StringLiteral S7, StringLiteral S8,
                       T Value) {
-    return CasesImpl(Value, {S0, S1, S2, S3, S4, S5, S6, S7, S8});
+    return CasesImpl({S0, S1, S2, S3, S4, S5, S6, S7, S8}, Value);
   }
 
+  [[deprecated("Pass cases in std::initializer_list instead")]]
   StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                       StringLiteral S3, StringLiteral S4, StringLiteral S5,
                       StringLiteral S6, StringLiteral S7, StringLiteral S8,
                       StringLiteral S9, T Value) {
-    return CasesImpl(Value, {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9});
+    return CasesImpl({S0, S1, S2, S3, S4, S5, S6, S7, S8, S9}, Value);
   }
 
   // Case-insensitive case matchers.
   StringSwitch &CaseLower(StringLiteral S, T Value) {
-    CaseLowerImpl(Value, S);
+    CaseLowerImpl(S, Value);
     return *this;
   }
 
@@ -161,26 +167,26 @@ public:
 
   StringSwitch &CasesLower(std::initializer_list<StringLiteral> CaseStrings,
                            T Value) {
-    return CasesLowerImpl(Value, CaseStrings);
+    return CasesLowerImpl(CaseStrings, Value);
   }
 
   StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, T Value) {
-    return CasesLowerImpl(Value, {S0, S1});
+    return CasesLowerImpl({S0, S1}, Value);
   }
 
   StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                            T Value) {
-    return CasesLowerImpl(Value, {S0, S1, S2});
+    return CasesLowerImpl({S0, S1, S2}, Value);
   }
 
   StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                            StringLiteral S3, T Value) {
-    return CasesLowerImpl(Value, {S0, S1, S2, S3});
+    return CasesLowerImpl({S0, S1, S2, S3}, Value);
   }
 
   StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
                            StringLiteral S3, StringLiteral S4, T Value) {
-    return CasesLowerImpl(Value, {S0, S1, S2, S3, S4});
+    return CasesLowerImpl({S0, S1, S2, S3, S4}, Value);
   }
 
   [[nodiscard]] R Default(T Value) {
@@ -201,7 +207,7 @@ public:
 
 private:
   // Returns true when `Str` matches the `S` argument, and stores the result.
-  bool CaseImpl(T &Value, StringLiteral S) {
+  bool CaseImpl(StringLiteral S, T &Value) {
     if (!Result && Str == S) {
       Result = std::move(Value);
       return true;
@@ -211,7 +217,7 @@ private:
 
   // Returns true when `Str` matches the `S` argument (case-insensitive), and
   // stores the result.
-  bool CaseLowerImpl(T &Value, StringLiteral S) {
+  bool CaseLowerImpl(StringLiteral S, T &Value) {
     if (!Result && Str.equals_insensitive(S)) {
       Result = std::move(Value);
       return true;
@@ -219,20 +225,20 @@ private:
     return false;
   }
 
-  StringSwitch &CasesImpl(T &Value,
-                          std::initializer_list<StringLiteral> Cases) {
+  StringSwitch &CasesImpl(std::initializer_list<StringLiteral> Cases,
+                          T &Value) {
     // Stop matching after the string is found.
     for (StringLiteral S : Cases)
-      if (CaseImpl(Value, S))
+      if (CaseImpl(S, Value))
         break;
     return *this;
   }
 
-  StringSwitch &CasesLowerImpl(T &Value,
-                               std::initializer_list<StringLiteral> Cases) {
+  StringSwitch &CasesLowerImpl(std::initializer_list<StringLiteral> Cases,
+                               T &Value) {
     // Stop matching after the string is found.
     for (StringLiteral S : Cases)
-      if (CaseLowerImpl(Value, S))
+      if (CaseLowerImpl(S, Value))
         break;
     return *this;
   }
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h
index 6bc51fe..5ad6288 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -575,7 +575,7 @@ public:
   /// cached embeddings should be invalidated to ensure
   /// correctness/recomputation. This is a no-op for SymbolicEmbedder but
   /// removes all the cached entries in FlowAwareEmbedder.
-  virtual void invalidateEmbeddings() { return; }
+  virtual void invalidateEmbeddings() {}
 };
 
 /// Class for computing the Symbolic embeddings of IR2Vec.
diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
index bb7f3be..ac03137 100644
--- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
+++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
@@ -60,11 +60,18 @@ public:
   LLVM_ABI StaticDataHotness getConstantHotnessUsingProfileCount(
       const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const;
 
+  /// Return the hotness based on section prefix \p SectionPrefix.
+  LLVM_ABI StaticDataHotness getSectionHotnessUsingDataAccessProfile(
+      std::optional<StringRef> SectionPrefix) const;
+
   /// Return the string representation of the hotness enum \p Hotness.
   LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const;
 
+  bool EnableDataAccessProf = false;
+
 public:
-  StaticDataProfileInfo() = default;
+  StaticDataProfileInfo(bool EnableDataAccessProf)
+      : EnableDataAccessProf(EnableDataAccessProf) {}
 
   /// If \p Count is not nullopt, add it to the profile count of the constant \p
   /// C in a saturating way, and clamp the count to \p getInstrMaxCountValue if
@@ -73,14 +80,10 @@ public:
   LLVM_ABI void addConstantProfileCount(const Constant *C,
                                         std::optional<uint64_t> Count);
 
-  /// Return a section prefix for the constant \p C based on its profile count.
-  /// - If a constant doesn't have a counter, return an empty string.
-  /// - Otherwise,
-  ///   - If it has a hot count, return "hot".
-  ///   - If it is seen by unprofiled function, return an empty string.
-  ///   - If it has a cold count, return "unlikely".
-  ///   - Otherwise (e.g. it's used by lukewarm functions), return an empty
-  ///     string.
+  /// Given a constant \p C, returns a section prefix.
+  /// If \p C is a global variable, the section prefix is the bigger one
+  /// between its existing section prefix and its use profile count. Otherwise,
+  /// the section prefix is based on its use profile count.
   LLVM_ABI StringRef getConstantSectionPrefix(
       const Constant *C, const ProfileSummaryInfo *PSI) const;
 };
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 26963ed..3f39b47 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -373,12 +373,10 @@ public:
   /// Disables all builtins.
   ///
   /// This can be used for options like -fno-builtin.
-  void disableAllFunctions() LLVM_ATTRIBUTE_UNUSED {
-    OverrideAsUnavailable.set();
-  }
+  [[maybe_unused]] void disableAllFunctions() { OverrideAsUnavailable.set(); }
 
   /// Forces a function to be marked as unavailable.
-  void setUnavailable(LibFunc F) LLVM_ATTRIBUTE_UNUSED {
+  [[maybe_unused]] void setUnavailable(LibFunc F) {
     assert(F < OverrideAsUnavailable.size() && "out-of-bounds LibFunc");
     OverrideAsUnavailable.set(F);
   }
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 19ca444..9ace255 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -16,6 +16,7 @@
 #define LLVM_CODEGEN_ASMPRINTER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -87,6 +88,10 @@ namespace remarks {
 class RemarkStreamer;
 }
 
+namespace vfs {
+class FileSystem;
+}
+
 /// This class is intended to be used as a driving class for all asm writers.
 class LLVM_ABI AsmPrinter : public MachineFunctionPass {
 public:
@@ -105,6 +110,9 @@ public:
   /// generating (such as the current section etc).
   std::unique_ptr<MCStreamer> OutStreamer;
 
+  /// The VFS to resolve asm include directives.
+  IntrusiveRefCntPtr<vfs::FileSystem> VFS;
+
   /// The current machine function.
   MachineFunction *MF = nullptr;
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 9855444..51318c9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -383,7 +383,8 @@ LLVM_ABI LegalizeMutation changeElementCountTo(unsigned TypeIdx,
 
 /// Keep the same scalar or element type as \p TypeIdx, but take the number of
 /// elements from \p Ty.
-LLVM_ABI LegalizeMutation changeElementCountTo(unsigned TypeIdx, LLT Ty);
+LLVM_ABI LegalizeMutation changeElementCountTo(unsigned TypeIdx,
+                                               ElementCount EC);
 
 /// Change the scalar size or element size to have the same scalar size as type
 /// index \p FromIndex. Unlike changeElementTo, this discards pointer types and
diff --git a/llvm/include/llvm/CodeGen/LiveRangeCalc.h b/llvm/include/llvm/CodeGen/LiveRangeCalc.h
index e9b62fb..67f5b69 100644
--- a/llvm/include/llvm/CodeGen/LiveRangeCalc.h
+++ b/llvm/include/llvm/CodeGen/LiveRangeCalc.h
@@ -259,7 +259,7 @@ public:
   /// jointly dominated by the blocks corresponding to the slot indices
   /// in @p Defs. This function is mainly for use in self-verification
   /// checks.
-  LLVM_ABI LLVM_ATTRIBUTE_UNUSED static bool
+  [[maybe_unused]] LLVM_ABI static bool
   isJointlyDominated(const MachineBasicBlock *MBB, ArrayRef<SlotIndex> Defs,
                      const SlotIndexes &Indexes);
 };
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 1169116..69713d0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1950,7 +1950,7 @@ LLVM_ABI bool isOnesOrOnesSplat(SDValue N, bool AllowUndefs = false);
 
 /// Return true if the value is a constant 0 integer or a splatted vector of a
 /// constant 0 integer (with no undefs).
-/// Does not permit build vector implicit truncation.
+/// Build vector implicit truncation is allowed.
 LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs = false);
 
 /// Return true if \p V is either a integer or FP constant.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
index f9070af..eb71e9a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
@@ -32,8 +32,9 @@ public:
   struct SymbolAddrs {
     ExecutorAddr Allocator;
     ExecutorAddr Reserve;
-    ExecutorAddr Finalize;
-    ExecutorAddr Deallocate;
+    ExecutorAddr Initialize;
+    ExecutorAddr Deinitialize;
+    ExecutorAddr Release;
   };
 
   /// Create an EPCGenericJITLinkMemoryManager instance from a given set of
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
index faec25d..fa48480 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
@@ -31,8 +31,8 @@ public:
   struct SymbolAddrs {
     ExecutorAddr Instance;
     ExecutorAddr Reserve;
-    ExecutorAddr Finalize;
-    ExecutorAddr Deallocate;
+    ExecutorAddr Initialize;
+    ExecutorAddr Release;
     ExecutorAddr RegisterEHFrame;
     ExecutorAddr DeregisterEHFrame;
   };
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
index 99ba456..d68a689 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
@@ -29,8 +29,9 @@ LLVM_ABI extern const char *SimpleExecutorDylibManagerResolveWrapperName;
 
 LLVM_ABI extern const char *SimpleExecutorMemoryManagerInstanceName;
 LLVM_ABI extern const char *SimpleExecutorMemoryManagerReserveWrapperName;
-LLVM_ABI extern const char *SimpleExecutorMemoryManagerFinalizeWrapperName;
-LLVM_ABI extern const char *SimpleExecutorMemoryManagerDeallocateWrapperName;
+LLVM_ABI extern const char *SimpleExecutorMemoryManagerInitializeWrapperName;
+LLVM_ABI extern const char *SimpleExecutorMemoryManagerDeinitializeWrapperName;
+LLVM_ABI extern const char *SimpleExecutorMemoryManagerReleaseWrapperName;
 
 LLVM_ABI extern const char *ExecutorSharedMemoryMapperServiceInstanceName;
 LLVM_ABI extern const char *ExecutorSharedMemoryMapperServiceReserveWrapperName;
@@ -73,9 +74,12 @@ using SPSSimpleExecutorDylibManagerResolveSignature = shared::SPSExpected<
 using SPSSimpleExecutorMemoryManagerReserveSignature =
     shared::SPSExpected<shared::SPSExecutorAddr>(shared::SPSExecutorAddr,
                                                  uint64_t);
-using SPSSimpleExecutorMemoryManagerFinalizeSignature =
-    shared::SPSError(shared::SPSExecutorAddr, shared::SPSFinalizeRequest);
-using SPSSimpleExecutorMemoryManagerDeallocateSignature = shared::SPSError(
+using SPSSimpleExecutorMemoryManagerInitializeSignature =
+    shared::SPSExpected<shared::SPSExecutorAddr>(shared::SPSExecutorAddr,
+                                                 shared::SPSFinalizeRequest);
+using SPSSimpleExecutorMemoryManagerDeinitializeSignature = shared::SPSError(
+    shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>);
+using SPSSimpleExecutorMemoryManagerReleaseSignature = shared::SPSError(
     shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>);
 
 // ExecutorSharedMemoryMapperService
@@ -93,6 +97,18 @@ using SPSExecutorSharedMemoryMapperServiceDeinitializeSignature =
 using SPSExecutorSharedMemoryMapperServiceReleaseSignature = shared::SPSError(
     shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>);
 
+// SimpleNativeMemoryMap APIs.
+using SPSSimpleRemoteMemoryMapReserveSignature =
+    shared::SPSExpected<shared::SPSExecutorAddr>(shared::SPSExecutorAddr,
+                                                 uint64_t);
+using SPSSimpleRemoteMemoryMapInitializeSignature =
+    shared::SPSExpected<shared::SPSExecutorAddr>(shared::SPSExecutorAddr,
+                                                 shared::SPSFinalizeRequest);
+using SPSSimpleRemoteMemoryMapDeinitializeSignature = shared::SPSError(
+    shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>);
+using SPSSimpleRemoteMemoryMapReleaseSignature = shared::SPSError(
+    shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>);
+
 using SPSRunAsMainSignature = int64_t(shared::SPSExecutorAddr,
                                       shared::SPSSequence<shared::SPSString>);
 using SPSRunAsVoidFunctionSignature = int32_t(shared::SPSExecutorAddr);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
index 741f203..6224e92 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
@@ -34,34 +34,65 @@ class LLVM_ABI SimpleExecutorMemoryManager : public ExecutorBootstrapService {
 public:
   virtual ~SimpleExecutorMemoryManager();
 
-  Expected<ExecutorAddr> allocate(uint64_t Size);
-  Error finalize(tpctypes::FinalizeRequest &FR);
-  Error deallocate(const std::vector<ExecutorAddr> &Bases);
+  Expected<ExecutorAddr> reserve(uint64_t Size);
+  Expected<ExecutorAddr> initialize(tpctypes::FinalizeRequest &FR);
+  Error deinitialize(const std::vector<ExecutorAddr> &InitKeys);
+  Error release(const std::vector<ExecutorAddr> &Bases);
 
   Error shutdown() override;
   void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override;
 
 private:
-  struct Allocation {
+  struct RegionInfo {
     size_t Size = 0;
-    std::vector<shared::WrapperFunctionCall> DeallocationActions;
+    std::vector<shared::WrapperFunctionCall> DeallocActions;
   };
 
-  using AllocationsMap = DenseMap<void *, Allocation>;
+  struct SlabInfo {
+    using RegionMap = std::map<ExecutorAddr, RegionInfo>;
+    size_t Size = 0;
+    RegionMap Regions;
+  };
+
+  using SlabMap = std::map<void *, SlabInfo>;
+
+  /// Get a reference to the slab information for the slab containing the given
+  /// address.
+  Expected<SlabInfo &> getSlabInfo(ExecutorAddr A, StringRef Context);
+
+  /// Get a reference to the slab information for the slab *covering* the given
+  /// range. The given range must be a subrange of e(possibly equal to) the
+  /// range of the slab itself.
+  Expected<SlabInfo &> getSlabInfo(ExecutorAddrRange R, StringRef Context);
 
-  Error deallocateImpl(void *Base, Allocation &A);
+  /// Create a RegionInfo for the given range, which must not overlap any
+  /// existing region.
+  Expected<RegionInfo &> createRegionInfo(ExecutorAddrRange R,
+                                          StringRef Context);
+
+  /// Get a reference to the region information for the given address. This
+  /// address must represent the start of an existing initialized region.
+  Expected<RegionInfo &> getRegionInfo(SlabInfo &Slab, ExecutorAddr A,
+                                       StringRef Context);
+
+  /// Get a reference to the region information for the given address. This
+  /// address must represent the start of an existing initialized region.
+  Expected<RegionInfo &> getRegionInfo(ExecutorAddr A, StringRef Context);
 
   static llvm::orc::shared::CWrapperFunctionResult
   reserveWrapper(const char *ArgData, size_t ArgSize);
 
   static llvm::orc::shared::CWrapperFunctionResult
-  finalizeWrapper(const char *ArgData, size_t ArgSize);
+  initializeWrapper(const char *ArgData, size_t ArgSize);
+
+  static llvm::orc::shared::CWrapperFunctionResult
+  deinitializeWrapper(const char *ArgData, size_t ArgSize);
 
   static llvm::orc::shared::CWrapperFunctionResult
-  deallocateWrapper(const char *ArgData, size_t ArgSize);
+  releaseWrapper(const char *ArgData, size_t ArgSize);
 
   std::mutex M;
-  AllocationsMap Allocations;
+  SlabMap Slabs;
 };
 
 } // end namespace rt_bootstrap
diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h
index 7c7e988..96d3b2f 100644
--- a/llvm/include/llvm/IR/CFG.h
+++ b/llvm/include/llvm/IR/CFG.h
@@ -42,9 +42,9 @@ template <class Ptr, class USE_iterator> // Predecessor Iterator
 class PredIterator {
 public:
   using iterator_category = std::forward_iterator_tag;
-  using value_type = Ptr;
+  using value_type = Ptr *;
   using difference_type = std::ptrdiff_t;
-  using pointer = Ptr *;
+  using pointer = Ptr **;
   using reference = Ptr *;
 
 protected:
@@ -141,7 +141,8 @@ class SuccIterator
                                   std::random_access_iterator_tag, BlockT, int,
                                   BlockT *, BlockT *> {
 public:
-  using difference_type = int;
+  using value_type = BlockT *;
+  using difference_type = std::ptrdiff_t;
   using pointer = BlockT *;
   using reference = BlockT *;
 
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 570d6bc..3b7077c 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -77,6 +77,9 @@ def int_dx_resource_updatecounter
     : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
                             [IntrInaccessibleMemOrArgMemOnly]>;
 
+def int_dx_resource_getdimensions_x
+    : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty], [IntrReadMem]>;
+
 // Cast between target extension handle types and dxil-style opaque handles
 def int_dx_resource_casthandle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
 
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 66e24fa..49a182be 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -167,6 +167,9 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
       : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
                               [IntrInaccessibleMemOrArgMemOnly]>;
 
+  def int_spv_resource_getdimensions_x
+      : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty], [IntrReadMem]>;
+
   def int_spv_resource_getpointer
       : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
                               [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 6183a7e..a8b647c 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -405,17 +405,19 @@ def MIPS16_RET_DF : RuntimeLibcall;
 def MIPS16_RET_SC : RuntimeLibcall;
 def MIPS16_RET_SF : RuntimeLibcall;
 
-multiclass LibmLongDoubleLibCall<string libcall_basename = !toupper(NAME),
-                                 string rtbasename = NAME> {
+multiclass LibmLongDoubleLibCall<string libcall_basename = !toupper(!substr(NAME, 0, !sub(!size(NAME), 1))),
+                                 string rtname = NAME> {
+
+
   def NAME#"_f128"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F128"),
-                           !strconcat(rtbasename, "l")>;
+                           rtname>;
   def NAME#"_ppcf128"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_PPCF128"),
-                           !strconcat(rtbasename, "l")>;
+                           rtname>;
   def NAME#"_f80"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F80"),
-                           !strconcat(rtbasename, "l")>;
+                           rtname>;
 }
 
 // AArch64 calls
@@ -765,19 +767,19 @@ def fmodl_ppc128 : RuntimeLibcallImpl<REM_PPCF128, "fmodl">;
 
 def fmaf : RuntimeLibcallImpl<FMA_F32>;
 def fma : RuntimeLibcallImpl<FMA_F64>;
-defm fma : LibmLongDoubleLibCall;
+defm fmal : LibmLongDoubleLibCall;
 
 def sqrtf : RuntimeLibcallImpl<SQRT_F32>;
 def sqrt : RuntimeLibcallImpl<SQRT_F64>;
-defm sqrt : LibmLongDoubleLibCall;
+defm sqrtl : LibmLongDoubleLibCall;
 
 def cbrtf : RuntimeLibcallImpl<CBRT_F32>;
 def cbrt : RuntimeLibcallImpl<CBRT_F64>;
-defm cbrt : LibmLongDoubleLibCall;
+defm cbrtl : LibmLongDoubleLibCall;
 
 def logf : RuntimeLibcallImpl<LOG_F32>;
 def log : RuntimeLibcallImpl<LOG_F64>;
-defm log : LibmLongDoubleLibCall;
+defm logl : LibmLongDoubleLibCall;
 
 def __logf_finite : RuntimeLibcallImpl<LOG_FINITE_F32>;
 def __log_finite : RuntimeLibcallImpl<LOG_FINITE_F64>;
@@ -787,7 +789,7 @@ def __logl_finite_ppcf128 : RuntimeLibcallImpl<LOG_FINITE_PPCF128, "__logl_finit
 
 def log2f : RuntimeLibcallImpl<LOG2_F32>;
 def log2 : RuntimeLibcallImpl<LOG2_F64>;
-defm log2 : LibmLongDoubleLibCall;
+defm log2l : LibmLongDoubleLibCall;
 
 def __log2f_finite : RuntimeLibcallImpl<LOG2_FINITE_F32>;
 def __log2_finite : RuntimeLibcallImpl<LOG2_FINITE_F64>;
@@ -797,7 +799,7 @@ def __log2l_finite_ppcf128 : RuntimeLibcallImpl<LOG2_FINITE_PPCF128, "__log2l_fi
 
 def log10f : RuntimeLibcallImpl<LOG10_F32>;
 def log10 : RuntimeLibcallImpl<LOG10_F64>;
-defm log10 : LibmLongDoubleLibCall;
+defm log10l : LibmLongDoubleLibCall;
 
 def __log10f_finite : RuntimeLibcallImpl<LOG10_FINITE_F32>;
 def __log10_finite : RuntimeLibcallImpl<LOG10_FINITE_F64>;
@@ -807,7 +809,7 @@ def __log10l_finite_ppcf128 : RuntimeLibcallImpl<LOG10_FINITE_PPCF128, "__log10l
 
 def expf : RuntimeLibcallImpl<EXP_F32>;
 def exp : RuntimeLibcallImpl<EXP_F64>;
-defm exp : LibmLongDoubleLibCall<"EXP", "exp">;
+defm expl : LibmLongDoubleLibCall<"EXP">;
 
 def __expf_finite : RuntimeLibcallImpl<EXP_FINITE_F32>;
 def __exp_finite : RuntimeLibcallImpl<EXP_FINITE_F64>;
@@ -817,7 +819,7 @@ def __expl_finite_ppcf128 : RuntimeLibcallImpl<EXP_FINITE_PPCF128, "__expl_finit
 
 def exp2f : RuntimeLibcallImpl<EXP2_F32>;
 def exp2 : RuntimeLibcallImpl<EXP2_F64>;
-defm exp2 : LibmLongDoubleLibCall<"EXP2", "exp2">;
+defm exp2l : LibmLongDoubleLibCall<"EXP2">;
 
 def __exp2f_finite : RuntimeLibcallImpl<EXP2_FINITE_F32>;
 def __exp2_finite : RuntimeLibcallImpl<EXP2_FINITE_F64>;
@@ -827,47 +829,47 @@ def __exp2l_finite_ppcf128 : RuntimeLibcallImpl<EXP2_FINITE_PPCF128, "__exp2l_fi
 
 def sinf : RuntimeLibcallImpl<SIN_F32>;
 def sin : RuntimeLibcallImpl<SIN_F64>;
-defm sin : LibmLongDoubleLibCall;
+defm sinl : LibmLongDoubleLibCall;
 
 def cosf : RuntimeLibcallImpl<COS_F32>;
 def cos : RuntimeLibcallImpl<COS_F64>;
-defm cos : LibmLongDoubleLibCall;
+defm cosl : LibmLongDoubleLibCall;
 
 def tanf : RuntimeLibcallImpl<TAN_F32>;
 def tan : RuntimeLibcallImpl<TAN_F64>;
-defm tan : LibmLongDoubleLibCall;
+defm tanl : LibmLongDoubleLibCall;
 
 def sinhf : RuntimeLibcallImpl<SINH_F32>;
 def sinh : RuntimeLibcallImpl<SINH_F64>;
-defm sinh : LibmLongDoubleLibCall;
+defm sinhl : LibmLongDoubleLibCall;
 
 def coshf : RuntimeLibcallImpl<COSH_F32>;
 def cosh : RuntimeLibcallImpl<COSH_F64>;
-defm cosh : LibmLongDoubleLibCall;
+defm coshl : LibmLongDoubleLibCall;
 
 def tanhf : RuntimeLibcallImpl<TANH_F32>;
 def tanh : RuntimeLibcallImpl<TANH_F64>;
-defm tanh : LibmLongDoubleLibCall;
+defm tanhl : LibmLongDoubleLibCall;
 
 def asinf : RuntimeLibcallImpl<ASIN_F32>;
 def asin : RuntimeLibcallImpl<ASIN_F64>;
-defm asin : LibmLongDoubleLibCall;
+defm asinl : LibmLongDoubleLibCall;
 
 def acosf : RuntimeLibcallImpl<ACOS_F32>;
 def acos : RuntimeLibcallImpl<ACOS_F64>;
-defm acos : LibmLongDoubleLibCall;
+defm acosl : LibmLongDoubleLibCall;
 
 def atanf : RuntimeLibcallImpl<ATAN_F32>;
 def atan : RuntimeLibcallImpl<ATAN_F64>;
-defm atan : LibmLongDoubleLibCall;
+defm atanl : LibmLongDoubleLibCall;
 
 def atan2f : RuntimeLibcallImpl<ATAN2_F32>;
 def atan2 : RuntimeLibcallImpl<ATAN2_F64>;
-defm atan2 : LibmLongDoubleLibCall;
+defm atan2l : LibmLongDoubleLibCall;
 
 def powf : RuntimeLibcallImpl<POW_F32>;
 def pow : RuntimeLibcallImpl<POW_F64>;
-defm pow : LibmLongDoubleLibCall;
+defm powl : LibmLongDoubleLibCall;
 
 def __powf_finite : RuntimeLibcallImpl<POW_FINITE_F32>;
 def __pow_finite : RuntimeLibcallImpl<POW_FINITE_F64>;
@@ -877,91 +879,91 @@ def __powl_finite_ppcf128 : RuntimeLibcallImpl<POW_FINITE_PPCF128, "__powl_finit
 
 def ceilf : RuntimeLibcallImpl<CEIL_F32>;
 def ceil : RuntimeLibcallImpl<CEIL_F64>;
-defm ceil : LibmLongDoubleLibCall;
+defm ceill : LibmLongDoubleLibCall;
 
 def truncf : RuntimeLibcallImpl<TRUNC_F32>;
 def trunc : RuntimeLibcallImpl<TRUNC_F64>;
-defm trunc : LibmLongDoubleLibCall;
+defm truncl : LibmLongDoubleLibCall;
 
 def rintf : RuntimeLibcallImpl<RINT_F32>;
 def rint : RuntimeLibcallImpl<RINT_F64>;
-defm rint : LibmLongDoubleLibCall;
+defm rintl : LibmLongDoubleLibCall;
 
 def nearbyintf : RuntimeLibcallImpl<NEARBYINT_F32>;
 def nearbyint : RuntimeLibcallImpl<NEARBYINT_F64>;
-defm nearbyint : LibmLongDoubleLibCall;
+defm nearbyintl : LibmLongDoubleLibCall;
 
 def roundf : RuntimeLibcallImpl<ROUND_F32>;
 def round : RuntimeLibcallImpl<ROUND_F64>;
-defm round : LibmLongDoubleLibCall;
+defm roundl : LibmLongDoubleLibCall;
 
 def roundevenf : RuntimeLibcallImpl<ROUNDEVEN_F32>;
 def roundeven : RuntimeLibcallImpl<ROUNDEVEN_F64>;
-defm roundeven : LibmLongDoubleLibCall;
+defm roundevenl : LibmLongDoubleLibCall;
 
 def floorf : RuntimeLibcallImpl<FLOOR_F32>;
 def floor : RuntimeLibcallImpl<FLOOR_F64>;
-defm floor : LibmLongDoubleLibCall;
+defm floorl : LibmLongDoubleLibCall;
 
 def copysignf : RuntimeLibcallImpl<COPYSIGN_F32>;
 def copysign : RuntimeLibcallImpl<COPYSIGN_F64>;
-defm copysign : LibmLongDoubleLibCall;
+defm copysignl : LibmLongDoubleLibCall;
 
 def fminf : RuntimeLibcallImpl<FMIN_F32>;
 def fmin : RuntimeLibcallImpl<FMIN_F64>;
-defm fmin : LibmLongDoubleLibCall;
+defm fminl : LibmLongDoubleLibCall;
 
 def fmaxf : RuntimeLibcallImpl<FMAX_F32>;
 def fmax : RuntimeLibcallImpl<FMAX_F64>;
-defm fmax : LibmLongDoubleLibCall;
+defm fmaxl : LibmLongDoubleLibCall;
 
 def fminimumf : RuntimeLibcallImpl<FMINIMUM_F32>;
 def fminimum : RuntimeLibcallImpl<FMINIMUM_F64>;
-defm fminimum : LibmLongDoubleLibCall;
+defm fminimuml : LibmLongDoubleLibCall;
 
 def fmaximumf : RuntimeLibcallImpl<FMAXIMUM_F32>;
 def fmaximum : RuntimeLibcallImpl<FMAXIMUM_F64>;
-defm fmaximum : LibmLongDoubleLibCall;
+defm fmaximuml : LibmLongDoubleLibCall;
 
 def fminimum_numf : RuntimeLibcallImpl<FMINIMUM_NUM_F32>;
 def fminimum_num : RuntimeLibcallImpl<FMINIMUM_NUM_F64>;
-defm fminimum_num : LibmLongDoubleLibCall;
+defm fminimum_numl : LibmLongDoubleLibCall;
 
 def fmaximum_numf : RuntimeLibcallImpl<FMAXIMUM_NUM_F32>;
 def fmaximum_num : RuntimeLibcallImpl<FMAXIMUM_NUM_F64>;
-defm fmaximum_num : LibmLongDoubleLibCall;
+defm fmaximum_numl : LibmLongDoubleLibCall;
 
 def lroundf : RuntimeLibcallImpl<LROUND_F32>;
 def lround : RuntimeLibcallImpl<LROUND_F64>;
-defm lround : LibmLongDoubleLibCall;
+defm lroundl : LibmLongDoubleLibCall;
 
 def llroundf : RuntimeLibcallImpl<LLROUND_F32>;
 def llround : RuntimeLibcallImpl<LLROUND_F64>;
-defm llround : LibmLongDoubleLibCall;
+defm llroundl : LibmLongDoubleLibCall;
 
 def lrintf : RuntimeLibcallImpl<LRINT_F32>;
 def lrint : RuntimeLibcallImpl<LRINT_F64>;
-defm lrint : LibmLongDoubleLibCall;
+defm lrintl : LibmLongDoubleLibCall;
 
 def llrintf : RuntimeLibcallImpl<LLRINT_F32>;
 def llrint : RuntimeLibcallImpl<LLRINT_F64>;
-defm llrint : LibmLongDoubleLibCall;
+defm llrintl : LibmLongDoubleLibCall;
 
 def ldexpf : RuntimeLibcallImpl<LDEXP_F32>;
 def ldexp : RuntimeLibcallImpl<LDEXP_F64>;
-defm ldexp : LibmLongDoubleLibCall;
+defm ldexpl : LibmLongDoubleLibCall;
 
 def frexpf : RuntimeLibcallImpl<FREXP_F32>;
 def frexp : RuntimeLibcallImpl<FREXP_F64>;
-defm frexp : LibmLongDoubleLibCall;
+defm frexpl : LibmLongDoubleLibCall;
 
 def sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
 def sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
-defm sincospi : LibmLongDoubleLibCall;
+defm sincospil : LibmLongDoubleLibCall;
 
 def modff : RuntimeLibcallImpl<MODF_F32>;
 def modf : RuntimeLibcallImpl<MODF_F64>;
-defm modf : LibmLongDoubleLibCall;
+defm modfl : LibmLongDoubleLibCall;
 
 // Floating point environment
 def fegetenv : RuntimeLibcallImpl<FEGETENV>;
@@ -1033,7 +1035,7 @@ def __sincos_stret : RuntimeLibcallImpl<SINCOS_STRET_F64>;
 
 def sincosf : RuntimeLibcallImpl<SINCOS_F32>;
 def sincos : RuntimeLibcallImpl<SINCOS_F64>;
-defm sincos : LibmLongDoubleLibCall;
+defm sincosl : LibmLongDoubleLibCall;
 
 def bzero : RuntimeLibcallImpl<BZERO>;
 def __bzero : RuntimeLibcallImpl<BZERO>;
@@ -1198,9 +1200,9 @@ defvar SecurityCheckCookieIfWinMSVC =
 
 defvar LibmHasSinCosF32 = LibcallImpls<(add sincosf), hasSinCos>;
 defvar LibmHasSinCosF64 =  LibcallImpls<(add sincos), hasSinCos>;
-defvar LibmHasSinCosF80 = LibcallImpls<(add sincos_f80), hasSinCos>;
-defvar LibmHasSinCosF128 = LibcallImpls<(add sincos_f128), hasSinCos>;
-defvar LibmHasSinCosPPCF128 = LibcallImpls<(add sincos_ppcf128), hasSinCos>;
+defvar LibmHasSinCosF80 = LibcallImpls<(add sincosl_f80), hasSinCos>;
+defvar LibmHasSinCosF128 = LibcallImpls<(add sincosl_f128), hasSinCos>;
+defvar LibmHasSinCosPPCF128 = LibcallImpls<(add sincosl_ppcf128), hasSinCos>;
 
 defvar LibmHasExp10F32 = LibcallImpls<(add exp10f), hasExp10>;
 defvar LibmHasExp10F64 = LibcallImpls<(add exp10), hasExp10>;
@@ -1214,8 +1216,8 @@ defvar DefaultLibmExp10 = [
 
 
 defvar WindowsMathRemovals = [
-  ldexpf, ldexp_f80, ldexp_f128, ldexp_ppcf128,
-  frexpf, frexp_f80, frexp_f128, frexp_ppcf128
+  ldexpf, ldexpl_f80, ldexpl_f128, ldexpl_ppcf128,
+  frexpf, frexpl_f80, frexpl_f128, frexpl_ppcf128
 ];
 
 defvar MostPowI = !listremove(PowiLibcallImpls, [__powitf2_f128, __powitf2_ppc128]);
@@ -1233,11 +1235,11 @@ defvar WinDefaultLibcallImpls = (add WinDefaultLibcallImplsBaseList,
 defvar LibmHasFrexpF32 = LibcallImpls<(add frexpf), isNotOSWindowsOrIsCygwinMinGW>;
 defvar LibmHasLdexpF32 = LibcallImpls<(add ldexpf), isNotOSWindowsOrIsCygwinMinGW>;
 
-defvar LibmHasFrexpF80 = LibcallImpls<(add frexp_f80), isNotOSWindowsOrIsCygwinMinGW>;
-defvar LibmHasLdexpF80 = LibcallImpls<(add ldexp_f80), isNotOSWindowsOrIsCygwinMinGW>;
+defvar LibmHasFrexpF80 = LibcallImpls<(add frexpl_f80), isNotOSWindowsOrIsCygwinMinGW>;
+defvar LibmHasLdexpF80 = LibcallImpls<(add ldexpl_f80), isNotOSWindowsOrIsCygwinMinGW>;
 
-defvar LibmHasFrexpF128 = LibcallImpls<(add frexp_f128), isNotOSWindowsOrIsCygwinMinGW>;
-defvar LibmHasLdexpF128 = LibcallImpls<(add ldexp_f128), isNotOSWindowsOrIsCygwinMinGW>;
+defvar LibmHasFrexpF128 = LibcallImpls<(add frexpl_f128), isNotOSWindowsOrIsCygwinMinGW>;
+defvar LibmHasLdexpF128 = LibcallImpls<(add ldexpl_f128), isNotOSWindowsOrIsCygwinMinGW>;
 
 defvar has__stack_chk_fail = LibcallImpls<(add __stack_chk_fail), isNotOSOpenBSD>;
 defvar has__stack_chk_guard =
@@ -2459,7 +2461,7 @@ defvar X86CommonLibcalls =
        LibcallImpls<(add __bzero), darwinHas__bzero>,
        LibmHasFrexpF32, LibmHasLdexpF32,
        LibmHasFrexpF80, LibmHasLdexpF80,
-       LibcallImpls<(add frexp_f128, ldexp_f128, exp10l_f128), hasExpFrexplLdexplF128>,
+       LibcallImpls<(add frexpl_f128, ldexpl_f128, exp10l_f128), hasExpFrexplLdexplF128>,
        DefaultRuntimeLibcallImpls_f80,
        LibmHasExp10F32, LibmHasExp10F64, LibmHasExp10F80,
        LibcallImpls<(add MostPowI), isNotOSMSVCRT>,
diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
index d460eb1..1617ae7 100644
--- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
+++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
@@ -13,6 +13,7 @@
 #define LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H
 
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/Debuginfod/BuildIDFetcher.h"
 #include "llvm/Object/BuildID.h"
 #include "llvm/ProfileData/InstrProf.h"
@@ -24,7 +25,6 @@
 #include <vector>
 
 namespace llvm {
-class DWARFContext;
 class DWARFDie;
 namespace object {
 class ObjectFile;
diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h
index 7fd9bef..cebf071 100644
--- a/llvm/include/llvm/Support/Caching.h
+++ b/llvm/include/llvm/Support/Caching.h
@@ -17,11 +17,10 @@
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 
-class MemoryBuffer;
-
 /// This class wraps an output stream for a file. Most clients should just be
 /// able to return an instance of this base class from the stream callback, but
 /// if a client needs to perform some action after the stream is written to,
diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h
index 7025ca14..fd67d7a 100644
--- a/llvm/include/llvm/Support/DebugLog.h
+++ b/llvm/include/llvm/Support/DebugLog.h
@@ -221,12 +221,10 @@ constexpr ::llvm::StringRef strip_quotes(const char *Str) {
 #define LDBG_GET_DEBUG_TYPE_STR() LDBG_GET_DEBUG_TYPE_STR_(DEBUG_TYPE)
 
 /// Helper to call isCurrentDebugType with a StringRef.
-static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(StringRef Type,
-                                                         int Level) {
+[[maybe_unused]] static bool ldbgIsCurrentDebugType(StringRef Type, int Level) {
   return ::llvm::isCurrentDebugType(Type.str().c_str(), Level);
 }
-static LLVM_ATTRIBUTE_UNUSED bool ldbgIsCurrentDebugType(int Level,
-                                                         StringRef Type) {
+[[maybe_unused]] static bool ldbgIsCurrentDebugType(int Level, StringRef Type) {
   return ::llvm::isCurrentDebugType(Type.str().c_str(), Level);
 }
 
@@ -302,7 +300,7 @@ public:
 };
 
 /// Remove the path prefix from the file name.
-static LLVM_ATTRIBUTE_UNUSED constexpr const char *
+[[maybe_unused]] static constexpr const char *
 getShortFileName(const char *path) {
   const char *filename = path;
   for (const char *p = path; *p != '\0'; ++p) {
@@ -315,7 +313,7 @@ getShortFileName(const char *path) {
 /// Compute the prefix for the debug log in the form of:
 /// "[DebugType] File:Line "
 /// Where the File is the file name without the path prefix.
-static LLVM_ATTRIBUTE_UNUSED std::string
+[[maybe_unused]] static std::string
 computePrefix(StringRef DebugType, const char *File, int Line, int Level) {
   std::string Prefix;
   raw_string_ostream OsPrefix(Prefix);
@@ -326,7 +324,7 @@ computePrefix(StringRef DebugType, const char *File, int Line, int Level) {
   return OsPrefix.str();
 }
 /// Overload allowing to swap the order of the DebugType and Level arguments.
-static LLVM_ATTRIBUTE_UNUSED std::string
+[[maybe_unused]] static std::string
 computePrefix(int Level, const char *File, int Line, StringRef DebugType) {
   return computePrefix(DebugType, File, Line, Level);
 }
diff --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h
index 5637b64..8320006 100644
--- a/llvm/include/llvm/Support/SourceMgr.h
+++ b/llvm/include/llvm/Support/SourceMgr.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_SUPPORT_SOURCEMGR_H
 #define LLVM_SUPPORT_SOURCEMGR_H
 
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -23,6 +24,10 @@
 
 namespace llvm {
 
+namespace vfs {
+class FileSystem;
+} // end namespace vfs
+
 class raw_ostream;
 class SMDiagnostic;
 class SMFixIt;
@@ -91,15 +96,25 @@ private:
   DiagHandlerTy DiagHandler = nullptr;
   void *DiagContext = nullptr;
 
+  // Optional file system for finding include files.
+  IntrusiveRefCntPtr<vfs::FileSystem> FS;
+
   bool isValidBufferID(unsigned i) const { return i && i <= Buffers.size(); }
 
 public:
-  SourceMgr() = default;
+  /// Create new source manager without support for include files.
+  SourceMgr();
+  /// Create new source manager with the capability of finding include files
+  /// via the provided file system.
+  explicit SourceMgr(IntrusiveRefCntPtr<vfs::FileSystem> FS);
   SourceMgr(const SourceMgr &) = delete;
   SourceMgr &operator=(const SourceMgr &) = delete;
-  SourceMgr(SourceMgr &&) = default;
-  SourceMgr &operator=(SourceMgr &&) = default;
-  ~SourceMgr() = default;
+  SourceMgr(SourceMgr &&);
+  SourceMgr &operator=(SourceMgr &&);
+  ~SourceMgr();
+
+  IntrusiveRefCntPtr<vfs::FileSystem> getVirtualFileSystem() const;
+  void setVirtualFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS);
 
   /// Return the include directories of this source manager.
   ArrayRef<std::string> getIncludeDirs() const { return IncludeDirectories; }
diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h
index ce91f62..e22c6d4 100644
--- a/llvm/include/llvm/TableGen/CodeGenHelpers.h
+++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h
@@ -50,19 +50,21 @@ private:
 };
 
 // Simple RAII helper for emitting namespace scope. Name can be a single
-// namespace (empty for anonymous namespace) or nested namespace.
+// namespace or nested namespace. If the name is empty, will not generate any
+// namespace scope.
 class NamespaceEmitter {
 public:
-  NamespaceEmitter(raw_ostream &OS, StringRef Name)
-      : Name(trim(Name).str()), OS(OS) {
-    OS << "namespace " << this->Name << " {\n";
+  NamespaceEmitter(raw_ostream &OS, StringRef NameUntrimmed)
+      : Name(trim(NameUntrimmed).str()), OS(OS) {
+    if (!Name.empty())
+      OS << "namespace " << Name << " {\n";
   }
 
   ~NamespaceEmitter() { close(); }
 
   // Explicit function to close the namespace scopes.
   void close() {
-    if (!Closed)
+    if (!Closed && !Name.empty())
       OS << "} // namespace " << Name << "\n";
     Closed = true;
   }
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index e62aa6d..254587b 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -115,6 +115,7 @@ X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "meteorlake")
 X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_SAPPHIRERAPIDS, "emeraldrapids")
 X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ARROWLAKE_S,"lunarlake")
 X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "gracemont")
+X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_PANTHERLAKE, "wildcatlake")
 
 #undef X86_CPU_SUBTYPE_ALIAS
 #undef X86_CPU_SUBTYPE
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index f6aeaad..e4c43cd 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -116,6 +116,7 @@ enum CPUKind {
   CK_ArrowlakeS,
   CK_Lunarlake,
   CK_Pantherlake,
+  CK_Wildcatlake,
   CK_Sierraforest,
   CK_Grandridge,
   CK_Graniterapids,
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index f90717d..1d1a5560 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -61,6 +61,9 @@ static cl::opt<SkipMLPolicyCriteria> SkipPolicy(
 static cl::opt<std::string> ModelSelector("ml-inliner-model-selector",
                                           cl::Hidden, cl::init(""));
 
+static cl::opt<bool> StopImmediatelyForTest("ml-inliner-stop-immediately",
+                                            cl::Hidden);
+
 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
 // codegen-ed file
 #include "InlinerSizeModel.h" // NOLINT
@@ -214,6 +217,7 @@ MLInlineAdvisor::MLInlineAdvisor(
     return;
   }
   ModelRunner->switchContext("");
+  ForceStop = StopImmediatelyForTest;
 }
 
 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
@@ -379,9 +383,17 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
 
   if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) {
-    if (!PSI.isFunctionEntryCold(&Caller))
-      return std::make_unique<InlineAdvice>(this, CB, ORE,
-                                            GetDefaultAdvice(CB));
+    if (!PSI.isFunctionEntryCold(&Caller)) {
+      // Return a MLInlineAdvice, despite delegating to the default advice,
+      // because we need to keep track of the internal state. This is different
+      // from the other instances where we return a "default" InlineAdvice,
+      // which happen at points we won't come back to the MLAdvisor for
+      // decisions requiring that state.
+      return ForceStop ? std::make_unique<InlineAdvice>(this, CB, ORE,
+                                                        GetDefaultAdvice(CB))
+                       : std::make_unique<MLInlineAdvice>(this, CB, ORE,
+                                                          GetDefaultAdvice(CB));
+    }
   }
   auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
   // If this is a "never inline" case, there won't be any changes to internal
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index e7f0b2c..61d4935 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -1,10 +1,14 @@
 #include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/ProfileData/InstrProf.h"
 
+#define DEBUG_TYPE "static-data-profile-info"
+
 using namespace llvm;
 
 namespace llvm {
@@ -79,6 +83,17 @@ StaticDataProfileInfo::getConstantHotnessUsingProfileCount(
   return StaticDataHotness::LukewarmOrUnknown;
 }
 
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getSectionHotnessUsingDataAccessProfile(
+    std::optional<StringRef> MaybeSectionPrefix) const {
+  if (!MaybeSectionPrefix)
+    return StaticDataHotness::LukewarmOrUnknown;
+  StringRef Prefix = *MaybeSectionPrefix;
+  assert((Prefix == "hot" || Prefix == "unlikely") &&
+         "Expect section_prefix to be one of hot or unlikely");
+  return Prefix == "hot" ? StaticDataHotness::Hot : StaticDataHotness::Cold;
+}
+
 StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const {
   switch (Hotness) {
   case StaticDataHotness::Cold:
@@ -101,13 +116,66 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
 StringRef StaticDataProfileInfo::getConstantSectionPrefix(
     const Constant *C, const ProfileSummaryInfo *PSI) const {
   std::optional<uint64_t> Count = getConstantProfileCount(C);
+
+#ifndef NDEBUG
+  auto DbgPrintPrefix = [](StringRef Prefix) {
+    return Prefix.empty() ? "<empty>" : Prefix;
+  };
+#endif
+
+  if (EnableDataAccessProf) {
+    // Module flag `HasDataAccessProf` is 1 -> empty section prefix means
+    // unknown hotness except for string literals.
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C);
+        GV && llvm::memprof::IsAnnotationOK(*GV) &&
+        !GV->getName().starts_with(".str")) {
+      auto HotnessFromDataAccessProf =
+          getSectionHotnessUsingDataAccessProfile(GV->getSectionPrefix());
+
+      if (!Count) {
+        StringRef Prefix = hotnessToStr(HotnessFromDataAccessProf);
+        LLVM_DEBUG(dbgs() << GV->getName() << " has section prefix "
+                          << DbgPrintPrefix(Prefix)
+                          << ", solely from data access profiles\n");
+        return Prefix;
+      }
+
+      // Both data access profiles and PGO counters are available. Use the
+      // hotter one.
+      auto HotnessFromPGO = getConstantHotnessUsingProfileCount(C, PSI, *Count);
+      StaticDataHotness GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown;
+      if (HotnessFromDataAccessProf == StaticDataHotness::Hot ||
+          HotnessFromPGO == StaticDataHotness::Hot) {
+        GlobalVarHotness = StaticDataHotness::Hot;
+      } else if (HotnessFromDataAccessProf ==
+                     StaticDataHotness::LukewarmOrUnknown ||
+                 HotnessFromPGO == StaticDataHotness::LukewarmOrUnknown) {
+        GlobalVarHotness = StaticDataHotness::LukewarmOrUnknown;
+      } else {
+        GlobalVarHotness = StaticDataHotness::Cold;
+      }
+      StringRef Prefix = hotnessToStr(GlobalVarHotness);
+      LLVM_DEBUG(
+          dbgs() << GV->getName() << " has section prefix "
+                 << DbgPrintPrefix(Prefix)
+                 << ", the max from data access profiles as "
+                 << DbgPrintPrefix(hotnessToStr(HotnessFromDataAccessProf))
+                 << " and PGO counters as "
+                 << DbgPrintPrefix(hotnessToStr(HotnessFromPGO)) << "\n");
+      return Prefix;
+    }
+  }
   if (!Count)
     return "";
   return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count));
 }
 
 bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
-  Info.reset(new StaticDataProfileInfo());
+  bool EnableDataAccessProf = false;
+  if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+          M.getModuleFlag("EnableDataAccessProf")))
+    EnableDataAccessProf = MD->getZExtValue();
+  Info.reset(new StaticDataProfileInfo(EnableDataAccessProf));
   return false;
 }
 
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 1dd470b..cf63285 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1270,7 +1270,7 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, unsigned NameID,
       if (parseToken(lltok::StringConstant, "expected partition string"))
         return true;
     } else if (!IsAlias && Lex.getKind() == lltok::MetadataVar) {
-      if (parseGlobalObjectMetadataAttachment(*GI.get()))
+      if (parseGlobalObjectMetadataAttachment(*GI))
         return true;
     } else {
       return tokError("unknown alias or ifunc property!");
@@ -5876,6 +5876,7 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   REQUIRED(file, MDField, (/* AllowNull */ false));                            \
   OPTIONAL(language, DwarfLangField, );                                        \
   OPTIONAL(sourceLanguageName, DwarfSourceLangNameField, );                    \
+  OPTIONAL(sourceLanguageVersion, MDUnsignedField, (0, UINT32_MAX));           \
   OPTIONAL(producer, MDStringField, );                                         \
   OPTIONAL(isOptimized, MDBoolField, );                                        \
   OPTIONAL(flags, MDStringField, );                                            \
@@ -5905,10 +5906,15 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
     return error(Loc, "can only specify one of 'language' and "
                       "'sourceLanguageName' on !DICompileUnit");
 
+  if (sourceLanguageVersion.Seen && !sourceLanguageName.Seen)
+    return error(Loc, "'sourceLanguageVersion' requires an associated "
+                      "'sourceLanguageName' on !DICompileUnit");
+
   Result = DICompileUnit::getDistinct(
       Context,
       language.Seen ? DISourceLanguageName(language.Val)
-                    : DISourceLanguageName(sourceLanguageName.Val, 0),
+                    : DISourceLanguageName(sourceLanguageName.Val,
+                                           sourceLanguageVersion.Val),
       file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val,
       splitDebugFilename.Val, emissionKind.Val, enums.Val, retainedTypes.Val,
       globals.Val, imports.Val, macros.Val, dwoId.Val, splitDebugInlining.Val,
diff --git a/llvm/lib/BinaryFormat/XCOFF.cpp b/llvm/lib/BinaryFormat/XCOFF.cpp
index e0a4471..19d5b98 100644
--- a/llvm/lib/BinaryFormat/XCOFF.cpp
+++ b/llvm/lib/BinaryFormat/XCOFF.cpp
@@ -112,26 +112,26 @@ StringRef XCOFF::getNameForTracebackTableLanguageId(
 XCOFF::CFileCpuId XCOFF::getCpuID(StringRef CPUName) {
   StringRef CPU = PPC::normalizeCPUName(CPUName);
   return StringSwitch<XCOFF::CFileCpuId>(CPU)
-      .Cases("generic", "COM", XCOFF::TCPU_COM)
+      .Cases({"generic", "COM"}, XCOFF::TCPU_COM)
       .Case("601", XCOFF::TCPU_601)
-      .Cases("602", "603", "603e", "603ev", XCOFF::TCPU_603)
-      .Cases("604", "604e", XCOFF::TCPU_604)
+      .Cases({"602", "603", "603e", "603ev"}, XCOFF::TCPU_603)
+      .Cases({"604", "604e"}, XCOFF::TCPU_604)
       .Case("620", XCOFF::TCPU_620)
       .Case("970", XCOFF::TCPU_970)
-      .Cases("a2", "g3", "g4", "g5", "e500", XCOFF::TCPU_COM)
-      .Cases("pwr3", "pwr4", XCOFF::TCPU_COM)
-      .Cases("pwr5", "PWR5", XCOFF::TCPU_PWR5)
-      .Cases("pwr5x", "PWR5X", XCOFF::TCPU_PWR5X)
-      .Cases("pwr6", "PWR6", XCOFF::TCPU_PWR6)
-      .Cases("pwr6x", "PWR6E", XCOFF::TCPU_PWR6E)
-      .Cases("pwr7", "PWR7", XCOFF::TCPU_PWR7)
-      .Cases("pwr8", "PWR8", XCOFF::TCPU_PWR8)
-      .Cases("pwr9", "PWR9", XCOFF::TCPU_PWR9)
-      .Cases("pwr10", "PWR10", XCOFF::TCPU_PWR10)
-      .Cases("ppc", "PPC", "ppc32", "ppc64", XCOFF::TCPU_COM)
+      .Cases({"a2", "g3", "g4", "g5", "e500"}, XCOFF::TCPU_COM)
+      .Cases({"pwr3", "pwr4"}, XCOFF::TCPU_COM)
+      .Cases({"pwr5", "PWR5"}, XCOFF::TCPU_PWR5)
+      .Cases({"pwr5x", "PWR5X"}, XCOFF::TCPU_PWR5X)
+      .Cases({"pwr6", "PWR6"}, XCOFF::TCPU_PWR6)
+      .Cases({"pwr6x", "PWR6E"}, XCOFF::TCPU_PWR6E)
+      .Cases({"pwr7", "PWR7"}, XCOFF::TCPU_PWR7)
+      .Cases({"pwr8", "PWR8"}, XCOFF::TCPU_PWR8)
+      .Cases({"pwr9", "PWR9"}, XCOFF::TCPU_PWR9)
+      .Cases({"pwr10", "PWR10"}, XCOFF::TCPU_PWR10)
+      .Cases({"ppc", "PPC", "ppc32", "ppc64"}, XCOFF::TCPU_COM)
       .Case("ppc64le", XCOFF::TCPU_PWR8)
       .Case("future", XCOFF::TCPU_PWR10)
-      .Cases("any", "ANY", XCOFF::TCPU_ANY)
+      .Cases({"any", "ANY"}, XCOFF::TCPU_ANY)
       .Default(XCOFF::TCPU_INVALID);
 }
 
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index cdcf7a8..ed0443f 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1860,7 +1860,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     break;
   }
   case bitc::METADATA_COMPILE_UNIT: {
-    if (Record.size() < 14 || Record.size() > 22)
+    if (Record.size() < 14 || Record.size() > 23)
       return error("Invalid record");
 
     // Ignore Record[0], which indicates whether this compile unit is
@@ -1869,11 +1869,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
 
     const auto LangVersionMask = (uint64_t(1) << 63);
     const bool HasVersionedLanguage = Record[1] & LangVersionMask;
+    const uint32_t LanguageVersion = Record.size() > 22 ? Record[22] : 0;
 
     auto *CU = DICompileUnit::getDistinct(
         Context,
         HasVersionedLanguage
-            ? DISourceLanguageName(Record[1] & ~LangVersionMask, 0)
+            ? DISourceLanguageName(Record[1] & ~LangVersionMask,
+                                   LanguageVersion)
             : DISourceLanguageName(Record[1]),
         getMDOrNull(Record[2]), getMDString(Record[3]), Record[4],
         getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8],
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 54e916e..8ff3aa9 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -2142,6 +2142,7 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
   Record.push_back(N->getRangesBaseAddress());
   Record.push_back(VE.getMetadataOrNullID(N->getRawSysRoot()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawSDK()));
+  Record.push_back(Lang.hasVersionedName() ? Lang.getVersion() : 0);
 
   Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
   Record.clear();
diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
index 323b21e..4e6f93e 100644
--- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
+++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
@@ -1102,8 +1102,6 @@ void TrieRawHashMapHandle::print(
 
   if (auto Err = Printer.printRecords())
     OS << "error: " << toString(std::move(Err)) << "\n";
-
-  return;
 }
 
 Error TrieRawHashMapHandle::validate(
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 05fffe9..e2af0c5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,6 +119,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/VCSRevision.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -476,6 +477,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
+  VFS = vfs::getRealFileSystem();
   auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
   MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
   HasSplitStack = false;
@@ -1683,7 +1685,7 @@ static ConstantInt *extractNumericCGTypeId(const Function &F) {
   return nullptr;
 }
 
-/// Emits .callgraph section.
+/// Emits .llvm.callgraph section.
 void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
                                       FunctionCallGraphInfo &FuncCGInfo) {
   if (!MF.getTarget().Options.EmitCallGraphSection)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c364ffc..8dd8b9da 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
@@ -98,6 +99,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
   unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode);
   SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager();
   SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+  SrcMgr.setVirtualFileSystem(VFS);
 
   std::unique_ptr<MCAsmParser> Parser(
       createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 2d50167..fae952e 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -491,6 +491,20 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
       return true;
     }
     if (FBB) {
+      // If we get here with a MBB which ends like this:
+      //
+      // bb.1:
+      // successors: %bb.2;
+      // ...
+      // BNE $x1, $x0, %bb.2
+      // PseudoBR %bb.2
+      //
+      // Just remove conditional branch.
+      if (TBB == FBB) {
+        removeBranch(MBB);
+        insertUncondBranch(MBB, TBB);
+        return true;
+      }
       // We need to split the basic block here to obtain two long-range
       // unconditional branches.
       NewBB = createNewBlockAfter(*MBB);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4320b1d..9e78ec9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -819,7 +819,7 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
 }
 
 // Verify BFI has been updated correctly by recomputing BFI and comparing them.
-void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
+[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
   DominatorTree NewDT(F);
   LoopInfo NewLI(NewDT);
   BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index 25c1db9..ded4df4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -55,12 +55,10 @@ LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
 }
 
 LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
-                                                         LLT NewEltTy) {
+                                                         ElementCount EC) {
   return [=](const LegalityQuery &Query) {
     const LLT OldTy = Query.Types[TypeIdx];
-    ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount()
-                                                   : ElementCount::getFixed(1);
-    return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+    return std::make_pair(TypeIdx, OldTy.changeElementCount(EC));
   };
 }
 
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index cffaf7c..38ec83f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3292,8 +3292,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     if (TypeIdx != 2)
       return UnableToLegalize;
     Observer.changingInstr(MI);
-    // TODO: Probably should be zext
-    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
     Observer.changedInstr(MI);
     return Legalized;
   }
@@ -3325,8 +3324,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
 
     if (TypeIdx == 2) {
       Observer.changingInstr(MI);
-      // TODO: Probably should be zext
-      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
       Observer.changedInstr(MI);
       return Legalized;
     }
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 055fdc6..ca82857 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
   if (!DefMI)
     return false;
 
-  const TargetMachine& TM = DefMI->getMF()->getTarget();
-  if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
+  if (DefMI->getFlag(MachineInstr::FmNoNans))
     return true;
 
   // If the value is a constant, we can obviously see if it is a NaN or not.
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index e359831..ea08365 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -1257,7 +1257,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
   Tracker.clear();
 }
 
-static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain(
+[[maybe_unused]] static void printSpillReloadChain(
     DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &SpillChain,
     DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &ReloadChain,
     MachineInstr *Leader) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 358e060..c97300d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17759,7 +17759,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
-  const TargetOptions &Options = DAG.getTarget().Options;
   SDNodeFlags Flags = N->getFlags();
   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
 
@@ -17825,7 +17824,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   bool AllowNewConst = (Level < AfterLegalizeDAG);
 
   // If nnan is enabled, fold lots of things.
-  if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+  if (Flags.hasNoNaNs() && AllowNewConst) {
     // If allowed, fold (fadd (fneg x), x) -> 0.0
     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
       return DAG.getConstantFP(0.0, DL, VT);
@@ -17974,7 +17973,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
-  const TargetOptions &Options = DAG.getTarget().Options;
   const SDNodeFlags Flags = N->getFlags();
   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
 
@@ -18002,7 +18000,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
 
   if (N0 == N1) {
     // (fsub x, x) -> 0.0
-    if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
+    if (Flags.hasNoNaNs())
       return DAG.getConstantFP(0.0f, DL, VT);
   }
 
@@ -18313,7 +18311,6 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
   ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
-  const TargetOptions &Options = DAG.getTarget().Options;
   // FMA nodes have flags that propagate to the created nodes.
   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
   MatchContextClass matcher(DAG, TLI, N);
@@ -18339,8 +18336,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
       return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
   }
 
-  if ((Options.NoNaNsFPMath && N->getFlags().hasNoInfs()) ||
-      (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
+  if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
     if (N->getFlags().hasNoSignedZeros() ||
         (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
       if (N0CFP && N0CFP->isZero())
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0f2b518..cb0038c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3971,8 +3971,14 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) {
 }
 
 void SelectionDAGBuilder::visitPtrToAddr(const User &I) {
-  // FIXME: this is not correct for pointers with addr width != pointer width
-  visitPtrToInt(I);
+  SDValue N = getValue(I.getOperand(0));
+  // By definition the type of the ptrtoaddr must be equal to the address type.
+  const auto &TLI = DAG.getTargetLoweringInfo();
+  EVT AddrVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+  // The address width must be smaller or equal to the pointer representation
+  // width, so we lower ptrtoaddr as a truncate (possibly folded to a no-op).
+  N = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), AddrVT, N);
+  setValue(&I, N);
 }
 
 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 6610eef..c61f757 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -181,8 +181,8 @@ DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch,
 
 DWARFDebugFrame::~DWARFDebugFrame() = default;
 
-static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
-                                              uint64_t Offset, int Length) {
+[[maybe_unused]] static void dumpDataAux(DataExtractor Data, uint64_t Offset,
+                                         int Length) {
   errs() << "DUMP: ";
   for (int i = 0; i < Length; ++i) {
     uint8_t c = Data.getU8(&Offset);
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
index 50e6b25..0833af7 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
@@ -57,16 +57,17 @@ public:
     std::swap(FR.Actions, G.allocActions());
 
     Parent.EPC.callSPSWrapperAsync<
-        rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
-        Parent.SAs.Finalize,
+        rt::SPSSimpleExecutorMemoryManagerInitializeSignature>(
+        Parent.SAs.Initialize,
         [OnFinalize = std::move(OnFinalize), AllocAddr = this->AllocAddr](
-            Error SerializationErr, Error FinalizeErr) mutable {
+            Error SerializationErr,
+            Expected<ExecutorAddr> InitializeKey) mutable {
           // FIXME: Release abandoned alloc.
           if (SerializationErr) {
-            cantFail(std::move(FinalizeErr));
+            cantFail(InitializeKey.takeError());
             OnFinalize(std::move(SerializationErr));
-          } else if (FinalizeErr)
-            OnFinalize(std::move(FinalizeErr));
+          } else if (!InitializeKey)
+            OnFinalize(InitializeKey.takeError());
           else
             OnFinalize(FinalizedAlloc(AllocAddr));
         },
@@ -76,8 +77,8 @@ public:
   void abandon(OnAbandonedFunction OnAbandoned) override {
     // FIXME: Return memory to pool instead.
     Parent.EPC.callSPSWrapperAsync<
-        rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
-        Parent.SAs.Deallocate,
+        rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
+        Parent.SAs.Release,
         [OnAbandoned = std::move(OnAbandoned)](Error SerializationErr,
                                                Error DeallocateErr) mutable {
           if (SerializationErr) {
@@ -123,9 +124,8 @@ void EPCGenericJITLinkMemoryManager::allocate(const JITLinkDylib *JD,
 
 void EPCGenericJITLinkMemoryManager::deallocate(
     std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) {
-  EPC.callSPSWrapperAsync<
-      rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
-      SAs.Deallocate,
+  EPC.callSPSWrapperAsync<rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
+      SAs.Release,
       [OnDeallocated = std::move(OnDeallocated)](Error SerErr,
                                                  Error DeallocErr) mutable {
         if (SerErr) {
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
index fec7062..cc72488 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
@@ -25,9 +25,9 @@ EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols(
   if (auto Err = EPC.getBootstrapSymbols(
           {{SAs.Instance, rt::SimpleExecutorMemoryManagerInstanceName},
            {SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
-           {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
-           {SAs.Deallocate,
-            rt::SimpleExecutorMemoryManagerDeallocateWrapperName},
+           {SAs.Initialize,
+            rt::SimpleExecutorMemoryManagerInitializeWrapperName},
+           {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName},
            {SAs.RegisterEHFrame, rt::RegisterEHFrameSectionAllocActionName},
            {SAs.DeregisterEHFrame,
             rt::DeregisterEHFrameSectionAllocActionName}}))
@@ -48,7 +48,7 @@ EPCGenericRTDyldMemoryManager::~EPCGenericRTDyldMemoryManager() {
 
   Error Err = Error::success();
   if (auto Err2 = EPC.callSPSWrapper<
-                  rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+                  rt::SPSSimpleExecutorMemoryManagerReleaseSignature>(
           SAs.Reserve, Err, SAs.Instance, FinalizedAllocs)) {
     // FIXME: Report errors through EPC once that functionality is available.
     logAllUnhandledErrors(std::move(Err2), errs(), "");
@@ -267,10 +267,10 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
 
     // We'll also need to make an extra allocation for the eh-frame wrapper call
     // arguments.
-    Error FinalizeErr = Error::success();
+    Expected<ExecutorAddr> InitializeKey((ExecutorAddr()));
     if (auto Err = EPC.callSPSWrapper<
-                   rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
-            SAs.Finalize, FinalizeErr, SAs.Instance, std::move(FR))) {
+                   rt::SPSSimpleExecutorMemoryManagerInitializeSignature>(
+            SAs.Initialize, InitializeKey, SAs.Instance, std::move(FR))) {
       std::lock_guard<std::mutex> Lock(M);
       this->ErrMsg = toString(std::move(Err));
       dbgs() << "Serialization error: " << this->ErrMsg << "\n";
@@ -278,9 +278,9 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
         *ErrMsg = this->ErrMsg;
       return true;
     }
-    if (FinalizeErr) {
+    if (!InitializeKey) {
       std::lock_guard<std::mutex> Lock(M);
-      this->ErrMsg = toString(std::move(FinalizeErr));
+      this->ErrMsg = toString(InitializeKey.takeError());
       dbgs() << "Finalization error: " << this->ErrMsg << "\n";
       if (ErrMsg)
         *ErrMsg = this->ErrMsg;
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
index 26e8f53..cc99d3c 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -23,10 +23,12 @@ const char *SimpleExecutorMemoryManagerInstanceName =
     "__llvm_orc_SimpleExecutorMemoryManager_Instance";
 const char *SimpleExecutorMemoryManagerReserveWrapperName =
     "__llvm_orc_SimpleExecutorMemoryManager_reserve_wrapper";
-const char *SimpleExecutorMemoryManagerFinalizeWrapperName =
-    "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper";
-const char *SimpleExecutorMemoryManagerDeallocateWrapperName =
-    "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper";
+const char *SimpleExecutorMemoryManagerInitializeWrapperName =
+    "__llvm_orc_SimpleExecutorMemoryManager_initialize_wrapper";
+const char *SimpleExecutorMemoryManagerDeinitializeWrapperName =
+    "__llvm_orc_SimpleExecutorMemoryManager_deinitialize_wrapper";
+const char *SimpleExecutorMemoryManagerReleaseWrapperName =
+    "__llvm_orc_SimpleExecutorMemoryManager_release_wrapper";
 
 const char *ExecutorSharedMemoryMapperServiceInstanceName =
     "__llvm_orc_ExecutorSharedMemoryMapperService_Instance";
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
index 87d7578..dec1df7 100644
--- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -216,9 +216,9 @@ SimpleRemoteEPC::createDefaultMemoryManager(SimpleRemoteEPC &SREPC) {
   if (auto Err = SREPC.getBootstrapSymbols(
           {{SAs.Allocator, rt::SimpleExecutorMemoryManagerInstanceName},
            {SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
-           {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
-           {SAs.Deallocate,
-            rt::SimpleExecutorMemoryManagerDeallocateWrapperName}}))
+           {SAs.Initialize,
+            rt::SimpleExecutorMemoryManagerInitializeWrapperName},
+           {SAs.Release, rt::SimpleExecutorMemoryManagerReleaseWrapperName}}))
     return std::move(Err);
 
   return std::make_unique<EPCGenericJITLinkMemoryManager>(SREPC, SAs);
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
index 3cdffb8..fe881a1 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h"
 
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
 #include "llvm/Support/FormatVariadic.h"
 
@@ -18,166 +19,167 @@ namespace orc {
 namespace rt_bootstrap {
 
 SimpleExecutorMemoryManager::~SimpleExecutorMemoryManager() {
-  assert(Allocations.empty() && "shutdown not called?");
+  assert(Slabs.empty() && "shutdown not called?");
 }
 
-Expected<ExecutorAddr> SimpleExecutorMemoryManager::allocate(uint64_t Size) {
+Expected<ExecutorAddr> SimpleExecutorMemoryManager::reserve(uint64_t Size) {
   std::error_code EC;
   auto MB = sys::Memory::allocateMappedMemory(
       Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
   if (EC)
     return errorCodeToError(EC);
   std::lock_guard<std::mutex> Lock(M);
-  assert(!Allocations.count(MB.base()) && "Duplicate allocation addr");
-  Allocations[MB.base()].Size = Size;
+  assert(!Slabs.count(MB.base()) && "Duplicate allocation addr");
+  Slabs[MB.base()].Size = Size;
   return ExecutorAddr::fromPtr(MB.base());
 }
 
-Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) {
-  ExecutorAddr Base(~0ULL);
+Expected<ExecutorAddr>
+SimpleExecutorMemoryManager::initialize(tpctypes::FinalizeRequest &FR) {
   std::vector<shared::WrapperFunctionCall> DeallocationActions;
-  size_t SuccessfulFinalizationActions = 0;
 
   if (FR.Segments.empty()) {
-    // NOTE: Finalizing nothing is currently a no-op. Should it be an error?
     if (FR.Actions.empty())
-      return Error::success();
+      return make_error<StringError>("Finalization request is empty",
+                                     inconvertibleErrorCode());
     else
       return make_error<StringError>("Finalization actions attached to empty "
                                      "finalization request",
                                      inconvertibleErrorCode());
   }
 
-  for (auto &Seg : FR.Segments)
-    Base = std::min(Base, Seg.Addr);
-
-  for (auto &ActPair : FR.Actions)
-    if (ActPair.Dealloc)
-      DeallocationActions.push_back(ActPair.Dealloc);
-
-  // Get the Allocation for this finalization.
-  size_t AllocSize = 0;
-  {
-    std::lock_guard<std::mutex> Lock(M);
-    auto I = Allocations.find(Base.toPtr<void *>());
-    if (I == Allocations.end())
-      return make_error<StringError>("Attempt to finalize unrecognized "
-                                     "allocation " +
-                                         formatv("{0:x}", Base.getValue()),
-                                     inconvertibleErrorCode());
-    AllocSize = I->second.Size;
-    I->second.DeallocationActions = std::move(DeallocationActions);
-  }
-  ExecutorAddr AllocEnd = Base + ExecutorAddrDiff(AllocSize);
-
-  // Bail-out function: this will run deallocation actions corresponding to any
-  // completed finalization actions, then deallocate memory.
-  auto BailOut = [&](Error Err) {
-    std::pair<void *, Allocation> AllocToDestroy;
-
-    // Get allocation to destroy.
-    {
-      std::lock_guard<std::mutex> Lock(M);
-      auto I = Allocations.find(Base.toPtr<void *>());
-
-      // Check for missing allocation (effective a double free).
-      if (I == Allocations.end())
-        return joinErrors(
-            std::move(Err),
-            make_error<StringError>("No allocation entry found "
-                                    "for " +
-                                        formatv("{0:x}", Base.getValue()),
-                                    inconvertibleErrorCode()));
-      AllocToDestroy = std::move(*I);
-      Allocations.erase(I);
-    }
+  ExecutorAddrRange RR(FR.Segments.front().Addr, FR.Segments.front().Addr);
 
-    // Run deallocation actions for all completed finalization actions.
-    while (SuccessfulFinalizationActions)
-      Err =
-          joinErrors(std::move(Err), FR.Actions[--SuccessfulFinalizationActions]
-                                         .Dealloc.runWithSPSRetErrorMerged());
-
-    // Deallocate memory.
-    sys::MemoryBlock MB(AllocToDestroy.first, AllocToDestroy.second.Size);
-    if (auto EC = sys::Memory::releaseMappedMemory(MB))
-      Err = joinErrors(std::move(Err), errorCodeToError(EC));
-
-    return Err;
-  };
+  std::vector<sys::MemoryBlock> MBsToReset;
+  auto ResetMBs = make_scope_exit([&]() {
+    for (auto &MB : MBsToReset)
+      sys::Memory::protectMappedMemory(MB, sys::Memory::MF_READ |
+                                               sys::Memory::MF_WRITE);
+    sys::Memory::InvalidateInstructionCache(RR.Start.toPtr<void *>(),
+                                            RR.size());
+  });
 
   // Copy content and apply permissions.
   for (auto &Seg : FR.Segments) {
+    RR.Start = std::min(RR.Start, Seg.Addr);
+    RR.End = std::max(RR.End, Seg.Addr + Seg.Size);
 
     // Check segment ranges.
     if (LLVM_UNLIKELY(Seg.Size < Seg.Content.size()))
-      return BailOut(make_error<StringError>(
+      return make_error<StringError>(
           formatv("Segment {0:x} content size ({1:x} bytes) "
                   "exceeds segment size ({2:x} bytes)",
                   Seg.Addr.getValue(), Seg.Content.size(), Seg.Size),
-          inconvertibleErrorCode()));
+          inconvertibleErrorCode());
     ExecutorAddr SegEnd = Seg.Addr + ExecutorAddrDiff(Seg.Size);
-    if (LLVM_UNLIKELY(Seg.Addr < Base || SegEnd > AllocEnd))
-      return BailOut(make_error<StringError>(
+    if (LLVM_UNLIKELY(Seg.Addr < RR.Start || SegEnd > RR.End))
+      return make_error<StringError>(
           formatv("Segment {0:x} -- {1:x} crosses boundary of "
                   "allocation {2:x} -- {3:x}",
-                  Seg.Addr.getValue(), SegEnd.getValue(), Base.getValue(),
-                  AllocEnd.getValue()),
-          inconvertibleErrorCode()));
+                  Seg.Addr, SegEnd, RR.Start, RR.End),
+          inconvertibleErrorCode());
 
     char *Mem = Seg.Addr.toPtr<char *>();
     if (!Seg.Content.empty())
       memcpy(Mem, Seg.Content.data(), Seg.Content.size());
     memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size());
     assert(Seg.Size <= std::numeric_limits<size_t>::max());
+
+    sys::MemoryBlock MB(Mem, Seg.Size);
     if (auto EC = sys::Memory::protectMappedMemory(
-            {Mem, static_cast<size_t>(Seg.Size)},
-            toSysMemoryProtectionFlags(Seg.RAG.Prot)))
-      return BailOut(errorCodeToError(EC));
+            MB, toSysMemoryProtectionFlags(Seg.RAG.Prot)))
+      return errorCodeToError(EC);
+
+    MBsToReset.push_back(MB);
+
     if ((Seg.RAG.Prot & MemProt::Exec) == MemProt::Exec)
       sys::Memory::InvalidateInstructionCache(Mem, Seg.Size);
   }
 
-  // Run finalization actions.
-  for (auto &ActPair : FR.Actions) {
-    if (auto Err = ActPair.Finalize.runWithSPSRetErrorMerged())
-      return BailOut(std::move(Err));
-    ++SuccessfulFinalizationActions;
+  auto DeallocActions = runFinalizeActions(FR.Actions);
+  if (!DeallocActions)
+    return DeallocActions.takeError();
+
+  {
+    std::lock_guard<std::mutex> Lock(M);
+    auto Region = createRegionInfo(RR, "In initialize");
+    if (!Region)
+      return Region.takeError();
+    Region->DeallocActions = std::move(*DeallocActions);
   }
 
-  return Error::success();
+  // Successful initialization.
+  ResetMBs.release();
+
+  return RR.Start;
 }
 
-Error SimpleExecutorMemoryManager::deallocate(
-    const std::vector<ExecutorAddr> &Bases) {
-  std::vector<std::pair<void *, Allocation>> AllocPairs;
-  AllocPairs.reserve(Bases.size());
+Error SimpleExecutorMemoryManager::deinitialize(
+    const std::vector<ExecutorAddr> &InitKeys) {
+  Error Err = Error::success();
 
-  // Get allocation to destroy.
+  for (auto &KeyAddr : llvm::reverse(InitKeys)) {
+    std::vector<shared::WrapperFunctionCall> DeallocActions;
+    {
+      std::scoped_lock<std::mutex> Lock(M);
+      auto Slab = getSlabInfo(KeyAddr, "In deinitialize");
+      if (!Slab) {
+        Err = joinErrors(std::move(Err), Slab.takeError());
+        continue;
+      }
+
+      auto RI = getRegionInfo(*Slab, KeyAddr, "In deinitialize");
+      if (!RI) {
+        Err = joinErrors(std::move(Err), RI.takeError());
+        continue;
+      }
+
+      DeallocActions = std::move(RI->DeallocActions);
+    }
+
+    Err = joinErrors(std::move(Err),
+                     runDeallocActions(std::move(DeallocActions)));
+  }
+
+  return Err;
+}
+
+Error SimpleExecutorMemoryManager::release(
+    const std::vector<ExecutorAddr> &Bases) {
   Error Err = Error::success();
-  {
-    std::lock_guard<std::mutex> Lock(M);
-    for (auto &Base : Bases) {
-      auto I = Allocations.find(Base.toPtr<void *>());
-
-      // Check for missing allocation (effective a double free).
-      if (I != Allocations.end()) {
-        AllocPairs.push_back(std::move(*I));
-        Allocations.erase(I);
-      } else
+
+  // TODO: Prohibit new initializations within the slabs being removed?
+  for (auto &Base : llvm::reverse(Bases)) {
+    std::vector<shared::WrapperFunctionCall> DeallocActions;
+    sys::MemoryBlock MB;
+
+    {
+      std::scoped_lock<std::mutex> Lock(M);
+
+      auto SlabI = Slabs.find(Base.toPtr<void *>());
+      if (SlabI == Slabs.end()) {
         Err = joinErrors(
             std::move(Err),
-            make_error<StringError>("No allocation entry found "
-                                    "for " +
-                                        formatv("{0:x}", Base.getValue()),
+            make_error<StringError>("In release, " + formatv("{0:x}", Base) +
+                                        " is not part of any reserved "
+                                        "address range",
                                     inconvertibleErrorCode()));
+        continue;
+      }
+
+      auto &Slab = SlabI->second;
+
+      for (auto &[Addr, Region] : Slab.Regions)
+        llvm::copy(Region.DeallocActions, back_inserter(DeallocActions));
+
+      MB = {Base.toPtr<void *>(), Slab.Size};
+
+      Slabs.erase(SlabI);
     }
-  }
 
-  while (!AllocPairs.empty()) {
-    auto &P = AllocPairs.back();
-    Err = joinErrors(std::move(Err), deallocateImpl(P.first, P.second));
-    AllocPairs.pop_back();
+    Err = joinErrors(std::move(Err), runDeallocActions(DeallocActions));
+    if (auto EC = sys::Memory::releaseMappedMemory(MB))
+      Err = joinErrors(std::move(Err), errorCodeToError(EC));
   }
 
   return Err;
@@ -185,16 +187,15 @@ Error SimpleExecutorMemoryManager::deallocate(
 
 Error SimpleExecutorMemoryManager::shutdown() {
 
-  AllocationsMap AM;
+  // TODO: Prevent new allocations during shutdown.
+  std::vector<ExecutorAddr> Bases;
   {
-    std::lock_guard<std::mutex> Lock(M);
-    AM = std::move(Allocations);
+    std::scoped_lock<std::mutex> Lock(M);
+    for (auto &[Base, Slab] : Slabs)
+      Bases.push_back(ExecutorAddr::fromPtr(Base));
   }
 
-  Error Err = Error::success();
-  for (auto &KV : AM)
-    Err = joinErrors(std::move(Err), deallocateImpl(KV.first, KV.second));
-  return Err;
+  return release(Bases);
 }
 
 void SimpleExecutorMemoryManager::addBootstrapSymbols(
@@ -202,58 +203,150 @@ void SimpleExecutorMemoryManager::addBootstrapSymbols(
   M[rt::SimpleExecutorMemoryManagerInstanceName] = ExecutorAddr::fromPtr(this);
   M[rt::SimpleExecutorMemoryManagerReserveWrapperName] =
       ExecutorAddr::fromPtr(&reserveWrapper);
-  M[rt::SimpleExecutorMemoryManagerFinalizeWrapperName] =
-      ExecutorAddr::fromPtr(&finalizeWrapper);
-  M[rt::SimpleExecutorMemoryManagerDeallocateWrapperName] =
-      ExecutorAddr::fromPtr(&deallocateWrapper);
+  M[rt::SimpleExecutorMemoryManagerInitializeWrapperName] =
+      ExecutorAddr::fromPtr(&initializeWrapper);
+  M[rt::SimpleExecutorMemoryManagerDeinitializeWrapperName] =
+      ExecutorAddr::fromPtr(&deinitializeWrapper);
+  M[rt::SimpleExecutorMemoryManagerReleaseWrapperName] =
+      ExecutorAddr::fromPtr(&releaseWrapper);
 }
 
-Error SimpleExecutorMemoryManager::deallocateImpl(void *Base, Allocation &A) {
-  Error Err = Error::success();
+Expected<SimpleExecutorMemoryManager::SlabInfo &>
+SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddr A, StringRef Context) {
+  auto MakeBadSlabError = [&]() {
+    return make_error<StringError>(
+        Context + ", address " + formatv("{0:x}", A) +
+            " is not part of any reserved address range",
+        inconvertibleErrorCode());
+  };
 
-  while (!A.DeallocationActions.empty()) {
-    Err = joinErrors(std::move(Err),
-                     A.DeallocationActions.back().runWithSPSRetErrorMerged());
-    A.DeallocationActions.pop_back();
+  auto I = Slabs.upper_bound(A.toPtr<void *>());
+  if (I == Slabs.begin())
+    return MakeBadSlabError();
+  --I;
+  if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size)
+           .contains(A))
+    return MakeBadSlabError();
+
+  return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::SlabInfo &>
+SimpleExecutorMemoryManager::getSlabInfo(ExecutorAddrRange R,
+                                         StringRef Context) {
+  auto MakeBadSlabError = [&]() {
+    return make_error<StringError>(
+        Context + ", range " + formatv("{0:x}", R) +
+            " is not part of any reserved address range",
+        inconvertibleErrorCode());
+  };
+
+  auto I = Slabs.upper_bound(R.Start.toPtr<void *>());
+  if (I == Slabs.begin())
+    return MakeBadSlabError();
+  --I;
+  if (!ExecutorAddrRange(ExecutorAddr::fromPtr(I->first), I->second.Size)
+           .contains(R))
+    return MakeBadSlabError();
+
+  return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::createRegionInfo(ExecutorAddrRange R,
+                                              StringRef Context) {
+
+  auto Slab = getSlabInfo(R, Context);
+  if (!Slab)
+    return Slab.takeError();
+
+  auto MakeBadRegionError = [&](ExecutorAddrRange Other, bool Prev) {
+    return make_error<StringError>(Context + ", region " + formatv("{0:x}", R) +
+                                       " overlaps " +
+                                       (Prev ? "previous" : "following") +
+                                       " region " + formatv("{0:x}", Other),
+                                   inconvertibleErrorCode());
+  };
+
+  auto I = Slab->Regions.upper_bound(R.Start);
+  if (I != Slab->Regions.begin()) {
+    auto J = std::prev(I);
+    ExecutorAddrRange PrevRange(J->first, J->second.Size);
+    if (PrevRange.overlaps(R))
+      return MakeBadRegionError(PrevRange, true);
+  }
+  if (I != Slab->Regions.end()) {
+    ExecutorAddrRange NextRange(I->first, I->second.Size);
+    if (NextRange.overlaps(R))
+      return MakeBadRegionError(NextRange, false);
   }
 
-  sys::MemoryBlock MB(Base, A.Size);
-  if (auto EC = sys::Memory::releaseMappedMemory(MB))
-    Err = joinErrors(std::move(Err), errorCodeToError(EC));
+  auto &RInfo = Slab->Regions[R.Start];
+  RInfo.Size = R.size();
+  return RInfo;
+}
 
-  return Err;
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::getRegionInfo(SlabInfo &Slab, ExecutorAddr A,
+                                           StringRef Context) {
+  auto I = Slab.Regions.find(A);
+  if (I == Slab.Regions.end())
+    return make_error<StringError>(
+        Context + ", address " + formatv("{0:x}", A) +
+            " does not correspond to the start of any initialized region",
+        inconvertibleErrorCode());
+
+  return I->second;
+}
+
+Expected<SimpleExecutorMemoryManager::RegionInfo &>
+SimpleExecutorMemoryManager::getRegionInfo(ExecutorAddr A, StringRef Context) {
+  auto Slab = getSlabInfo(A, Context);
+  if (!Slab)
+    return Slab.takeError();
+
+  return getRegionInfo(*Slab, A, Context);
 }
 
 llvm::orc::shared::CWrapperFunctionResult
 SimpleExecutorMemoryManager::reserveWrapper(const char *ArgData,
                                             size_t ArgSize) {
-  return shared::WrapperFunction<
-             rt::SPSSimpleExecutorMemoryManagerReserveSignature>::
+  return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReserveSignature>::
       handle(ArgData, ArgSize,
              shared::makeMethodWrapperHandler(
-                 &SimpleExecutorMemoryManager::allocate))
+                 &SimpleExecutorMemoryManager::reserve))
+          .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::initializeWrapper(const char *ArgData,
+                                               size_t ArgSize) {
+  return shared::
+      WrapperFunction<rt::SPSSimpleRemoteMemoryMapInitializeSignature>::handle(
+             ArgData, ArgSize,
+             shared::makeMethodWrapperHandler(
+                 &SimpleExecutorMemoryManager::initialize))
           .release();
 }
 
 llvm::orc::shared::CWrapperFunctionResult
-SimpleExecutorMemoryManager::finalizeWrapper(const char *ArgData,
-                                             size_t ArgSize) {
+SimpleExecutorMemoryManager::deinitializeWrapper(const char *ArgData,
+                                                 size_t ArgSize) {
   return shared::WrapperFunction<
-             rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>::
+             rt::SPSSimpleRemoteMemoryMapDeinitializeSignature>::
       handle(ArgData, ArgSize,
              shared::makeMethodWrapperHandler(
-                 &SimpleExecutorMemoryManager::finalize))
+                 &SimpleExecutorMemoryManager::deinitialize))
           .release();
 }
 
 llvm::orc::shared::CWrapperFunctionResult
-SimpleExecutorMemoryManager::deallocateWrapper(const char *ArgData,
-                                               size_t ArgSize) {
-  return shared::WrapperFunction<
-             rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>::
+SimpleExecutorMemoryManager::releaseWrapper(const char *ArgData,
+                                            size_t ArgSize) {
+  return shared::WrapperFunction<rt::SPSSimpleRemoteMemoryMapReleaseSignature>::
       handle(ArgData, ArgSize,
              shared::makeMethodWrapperHandler(
-                 &SimpleExecutorMemoryManager::deallocate))
+                 &SimpleExecutorMemoryManager::release))
           .release();
 }
 
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 2430d98..3908a78 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2374,16 +2374,21 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
   Out << "!DICompileUnit(";
   MDFieldPrinter Printer(Out, WriterCtx);
 
-  auto Lang = N->getSourceLanguage();
-  if (Lang.hasVersionedName())
+  DISourceLanguageName Lang = N->getSourceLanguage();
+
+  if (Lang.hasVersionedName()) {
     Printer.printDwarfEnum(
         "sourceLanguageName",
         static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
         dwarf::SourceLanguageNameString,
         /* ShouldSkipZero */ false);
-  else
+
+    Printer.printInt("sourceLanguageVersion", Lang.getVersion(),
+                     /*ShouldSkipZero=*/true);
+  } else {
     Printer.printDwarfEnum("language", Lang.getName(), dwarf::LanguageString,
                            /* ShouldSkipZero */ false);
+  }
 
   Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
   Printer.printString("producer", N->getProducer());
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index a755c22..aee3c3b 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -553,7 +553,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
   SFrameSection =
       Ctx->getELFSection(".sframe", ELF::SHT_GNU_SFRAME, ELF::SHF_ALLOC);
 
-  CallGraphSection = Ctx->getELFSection(".callgraph", ELF::SHT_PROGBITS, 0);
+  CallGraphSection =
+      Ctx->getELFSection(".llvm.callgraph", ELF::SHT_PROGBITS, 0);
 
   StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
 
@@ -1171,8 +1172,8 @@ MCObjectFileInfo::getCallGraphSection(const MCSection &TextSec) const {
   }
 
   return Ctx->getELFSection(
-      ".callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName, true,
-      ElfSec.getUniqueID(),
+      ".llvm.callgraph", ELF::SHT_PROGBITS, Flags, 0, GroupName,
+      /*IsComdat=*/true, ElfSec.getUniqueID(),
       static_cast<const MCSymbolELF *>(TextSec.getBeginSymbol()));
 }
 
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 3c8e44a..0208735 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -302,7 +302,7 @@ void ProfOStream::patch(ArrayRef<PatchItem> P) {
 
 std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage,
                            StringRef FileName,
-                           uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
+                           [[maybe_unused]] uint64_t Version) {
   // Value names may be prefixed with a binary '1' to indicate
   // that the backend should not modify the symbols due to any platform
   // naming convention. Do not include that '1' in the PGO profile name.
diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp
index 82b0e6a..eff9947 100644
--- a/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/llvm/lib/Support/PrettyStackTrace.cpp
@@ -141,7 +141,7 @@ extern "C" const char *__crashreporter_info__
 asm(".desc ___crashreporter_info__, 0x10");
 #endif
 
-static void setCrashLogMessage(const char *msg) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] static void setCrashLogMessage(const char *msg);
 static void setCrashLogMessage(const char *msg) {
 #ifdef HAVE_CRASHREPORTERCLIENT_H
   (void)CRSetCrashLogMessage(msg);
diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp
index a43cf37a..f2bbaab 100644
--- a/llvm/lib/Support/SourceMgr.cpp
+++ b/llvm/lib/Support/SourceMgr.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SMLoc.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -38,6 +39,22 @@ using namespace llvm;
 
 static const size_t TabStop = 8;
 
+// Out of line to avoid needing definition of vfs::FileSystem in header.
+SourceMgr::SourceMgr() = default;
+SourceMgr::SourceMgr(IntrusiveRefCntPtr<vfs::FileSystem> FS)
+    : FS(std::move(FS)) {}
+SourceMgr::SourceMgr(SourceMgr &&) = default;
+SourceMgr &SourceMgr::operator=(SourceMgr &&) = default;
+SourceMgr::~SourceMgr() = default;
+
+IntrusiveRefCntPtr<vfs::FileSystem> SourceMgr::getVirtualFileSystem() const {
+  return FS;
+}
+
+void SourceMgr::setVirtualFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+  this->FS = std::move(FS);
+}
+
 unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
                                    SMLoc IncludeLoc,
                                    std::string &IncludedFile) {
@@ -52,8 +69,11 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
 ErrorOr<std::unique_ptr<MemoryBuffer>>
 SourceMgr::OpenIncludeFile(const std::string &Filename,
                            std::string &IncludedFile) {
+  if (!FS)
+    reportFatalInternalError("Opening include file from SourceMgr without VFS");
+
   ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
-      MemoryBuffer::getFile(Filename);
+      FS->getBufferForFile(Filename);
 
   SmallString<64> Buffer(Filename);
   // If the file didn't exist directly, see if it's in an include path.
@@ -61,7 +81,7 @@ SourceMgr::OpenIncludeFile(const std::string &Filename,
        ++i) {
     Buffer = IncludeDirectories[i];
     sys::path::append(Buffer, Filename);
-    NewBufOrErr = MemoryBuffer::getFile(Buffer);
+    NewBufOrErr = FS->getBufferForFile(Buffer);
   }
 
   if (NewBufOrErr)
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 42043f7..b1024a8 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
@@ -128,6 +129,7 @@ int llvm::TableGenMain(const char *argv0,
   // Record the location of the include directory so that the lexer can find
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
+  SrcMgr.setVirtualFileSystem(vfs::getRealFileSystem());
 
   TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
 
diff --git a/llvm/lib/TableGen/Parser.cpp b/llvm/lib/TableGen/Parser.cpp
index 2c3726a..db45054 100644
--- a/llvm/lib/TableGen/Parser.cpp
+++ b/llvm/lib/TableGen/Parser.cpp
@@ -9,6 +9,7 @@
 #include "llvm/TableGen/Parser.h"
 #include "TGParser.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TableGen/Record.h"
 
 using namespace llvm;
@@ -21,6 +22,7 @@ bool llvm::TableGenParseFile(SourceMgr &InputSrcMgr, RecordKeeper &Records) {
   SrcMgr = SourceMgr();
   SrcMgr.takeSourceBuffersFrom(InputSrcMgr);
   SrcMgr.setIncludeDirs(InputSrcMgr.getIncludeDirs());
+  SrcMgr.setVirtualFileSystem(InputSrcMgr.getVirtualFileSystem());
   SrcMgr.setDiagHandler(InputSrcMgr.getDiagHandler(),
                         InputSrcMgr.getDiagContext());
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9926a4d..be2f2e4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16254,7 +16254,7 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
       SplatVal > 1) {
     SDValue Pg = getPredicateForScalableVector(DAG, DL, VT);
     SDValue Res =
-        DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
+        DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
                     DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32));
     if (Negated)
       Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
@@ -22942,7 +22942,7 @@ static SDValue performIntrinsicCombine(SDNode *N,
     return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2));
   case Intrinsic::aarch64_sve_asrd:
-    return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
+    return DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
   case Intrinsic::aarch64_sve_cmphs:
     if (!N->getOperand(2).getValueType().isFloatingPoint())
@@ -30047,7 +30047,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
 
     SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
     SDValue Res =
-        DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2);
+        DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2);
     if (Negated)
       Res = DAG.getNode(ISD::SUB, DL, ContainerVT,
                         DAG.getConstant(0, DL, ContainerVT), Res);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7322212..fe84193 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction {
   let hasSideEffects = 0;
 }
 
+def G_USDOT : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+  let hasSideEffects = 0;
+}
+
 // Generic instruction for the BSP pseudo. It is expanded into BSP, which
 // expands into BSL/BIT/BIF after register allocation.
 def G_BSP : AArch64GenericInstruction {
@@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;
 
 def : GINodeEquiv<G_UDOT, AArch64udot>;
 def : GINodeEquiv<G_SDOT, AArch64sdot>;
+def : GINodeEquiv<G_USDOT, AArch64usdot>;
 
 def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index b3c9656..343fd81 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -40,7 +40,11 @@ yaml::AArch64FunctionInfo::AArch64FunctionInfo(
           getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)),
       HasStackFrame(MFI.hasStackFrame()
                         ? std::optional<bool>(MFI.hasStackFrame())
-                        : std::nullopt) {}
+                        : std::nullopt),
+      HasStreamingModeChanges(
+          MFI.hasStreamingModeChanges()
+              ? std::optional<bool>(MFI.hasStreamingModeChanges())
+              : std::nullopt) {}
 
 void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) {
   MappingTraits<AArch64FunctionInfo>::mapping(YamlIO, *this);
@@ -55,6 +59,8 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
                     YamlMFI.StackSizePPR.value_or(0));
   if (YamlMFI.HasStackFrame)
     setHasStackFrame(*YamlMFI.HasStackFrame);
+  if (YamlMFI.HasStreamingModeChanges)
+    setHasStreamingModeChanges(*YamlMFI.HasStreamingModeChanges);
 }
 
 static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index bd0a17d..d1832f4 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -645,6 +645,7 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
   std::optional<uint64_t> StackSizeZPR;
   std::optional<uint64_t> StackSizePPR;
   std::optional<bool> HasStackFrame;
+  std::optional<bool> HasStreamingModeChanges;
 
   AArch64FunctionInfo() = default;
   AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI);
@@ -659,6 +660,7 @@ template <> struct MappingTraits<AArch64FunctionInfo> {
     YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR);
     YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR);
     YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame);
+    YamlIO.mapOptional("hasStreamingModeChanges", MFI.HasStreamingModeChanges);
   }
 };
 
diff --git a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
index cdf2822..a90950d 100644
--- a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
@@ -75,6 +75,10 @@ bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
         if (Src != Dst)
           MRI->replaceRegWith(Dst, Src);
 
+        if (MI.getOperand(1).isUndef())
+          for (MachineOperand &MO : MRI->use_operands(Dst))
+            MO.setIsUndef();
+
         // MI must be erased from the basic block before recalculating the live
         // interval.
         LIS->RemoveMachineInstrFromMaps(MI);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index bc6b931..98a128e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -265,7 +265,7 @@ def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
   SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
 ]>;
 
-def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>;
+def AArch64asrd_m1 : SDNode<"AArch64ISD::ASRD_MERGE_OP1", SDT_AArch64Arith_Imm>;
 def AArch64urshri_p_node : SDNode<"AArch64ISD::URSHR_I_PRED", SDT_AArch64Arith_Imm>;
 
 def AArch64urshri_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9e2d698..05a4313 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     return LowerTriOp(AArch64::G_UDOT);
   case Intrinsic::aarch64_neon_sdot:
     return LowerTriOp(AArch64::G_SDOT);
+  case Intrinsic::aarch64_neon_usdot:
+    return LowerTriOp(AArch64::G_USDOT);
   case Intrinsic::aarch64_neon_sqxtn:
     return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
   case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index 4749748..434ea67 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -294,6 +294,12 @@ struct MachineSMEABI : public MachineFunctionPass {
                                     MachineBasicBlock::iterator MBBI,
                                     LiveRegs PhysLiveRegs);
 
+  /// Attempts to find an insertion point before \p Inst where the status flags
+  /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of
+  /// the block is found.
+  std::pair<MachineBasicBlock::iterator, LiveRegs>
+  findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block,
+                                SmallVectorImpl<InstInfo>::const_iterator Inst);
   void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
                        MachineBasicBlock::iterator MBBI, ZAState From,
                        ZAState To, LiveRegs PhysLiveRegs);
@@ -337,6 +343,28 @@ private:
   MachineRegisterInfo *MRI = nullptr;
 };
 
+static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
+  LiveRegs PhysLiveRegs = LiveRegs::None;
+  if (!LiveUnits.available(AArch64::NZCV))
+    PhysLiveRegs |= LiveRegs::NZCV;
+  // We have to track W0 and X0 separately as otherwise things can get
+  // confused if we attempt to preserve X0 but only W0 was defined.
+  if (!LiveUnits.available(AArch64::W0))
+    PhysLiveRegs |= LiveRegs::W0;
+  if (!LiveUnits.available(AArch64::W0_HI))
+    PhysLiveRegs |= LiveRegs::W0_HI;
+  return PhysLiveRegs;
+}
+
+static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) {
+  if (PhysLiveRegs & LiveRegs::NZCV)
+    LiveUnits.addReg(AArch64::NZCV);
+  if (PhysLiveRegs & LiveRegs::W0)
+    LiveUnits.addReg(AArch64::W0);
+  if (PhysLiveRegs & LiveRegs::W0_HI)
+    LiveUnits.addReg(AArch64::W0_HI);
+}
+
 FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
   assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
           SMEFnAttrs.hasZAState()) &&
@@ -362,26 +390,13 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
     LiveRegUnits LiveUnits(*TRI);
     LiveUnits.addLiveOuts(MBB);
 
-    auto GetPhysLiveRegs = [&] {
-      LiveRegs PhysLiveRegs = LiveRegs::None;
-      if (!LiveUnits.available(AArch64::NZCV))
-        PhysLiveRegs |= LiveRegs::NZCV;
-      // We have to track W0 and X0 separately as otherwise things can get
-      // confused if we attempt to preserve X0 but only W0 was defined.
-      if (!LiveUnits.available(AArch64::W0))
-        PhysLiveRegs |= LiveRegs::W0;
-      if (!LiveUnits.available(AArch64::W0_HI))
-        PhysLiveRegs |= LiveRegs::W0_HI;
-      return PhysLiveRegs;
-    };
-
-    Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
+    Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
     auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
     auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
     for (MachineInstr &MI : reverse(MBB)) {
       MachineBasicBlock::iterator MBBI(MI);
       LiveUnits.stepBackward(MI);
-      LiveRegs PhysLiveRegs = GetPhysLiveRegs();
+      LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
       // The SMEStateAllocPseudo marker is added to a function if the save
       // buffer was allocated in SelectionDAG. It marks the end of the
       // allocation -- which is a safe point for this pass to insert any TPIDR2
@@ -476,6 +491,49 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
   return BundleStates;
 }
 
+std::pair<MachineBasicBlock::iterator, LiveRegs>
+MachineSMEABI::findStateChangeInsertionPoint(
+    MachineBasicBlock &MBB, const BlockInfo &Block,
+    SmallVectorImpl<InstInfo>::const_iterator Inst) {
+  LiveRegs PhysLiveRegs;
+  MachineBasicBlock::iterator InsertPt;
+  if (Inst != Block.Insts.end()) {
+    InsertPt = Inst->InsertPt;
+    PhysLiveRegs = Inst->PhysLiveRegs;
+  } else {
+    InsertPt = MBB.getFirstTerminator();
+    PhysLiveRegs = Block.PhysLiveRegsAtExit;
+  }
+
+  if (!(PhysLiveRegs & LiveRegs::NZCV))
+    return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
+
+  // Find the previous state change. We can not move before this point.
+  MachineBasicBlock::iterator PrevStateChangeI;
+  if (Inst == Block.Insts.begin()) {
+    PrevStateChangeI = MBB.begin();
+  } else {
+    // Note: `std::prev(Inst)` is the previous InstInfo. We only create an
+    // InstInfo object for instructions that require a specific ZA state, so the
+    // InstInfo is the site of the previous state change in the block (which can
+    // be several MIs earlier).
+    PrevStateChangeI = std::prev(Inst)->InsertPt;
+  }
+
+  // Note: LiveUnits will only accurately track X0 and NZCV.
+  LiveRegUnits LiveUnits(*TRI);
+  setPhysLiveRegs(LiveUnits, PhysLiveRegs);
+  for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
+    // Don't move before/into a call (which may have a state change before it).
+    if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
+      break;
+    LiveUnits.stepBackward(*I);
+    if (LiveUnits.available(AArch64::NZCV))
+      return {I, getPhysLiveRegs(LiveUnits)};
+  }
+  return {InsertPt, PhysLiveRegs};
+}
+
 void MachineSMEABI::insertStateChanges(EmitContext &Context,
                                        const FunctionInfo &FnInfo,
                                        const EdgeBundles &Bundles,
@@ -490,10 +548,13 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context,
       CurrentState = InState;
 
     for (auto &Inst : Block.Insts) {
-      if (CurrentState != Inst.NeededState)
-        emitStateChange(Context, MBB, Inst.InsertPt, CurrentState,
-                        Inst.NeededState, Inst.PhysLiveRegs);
-      CurrentState = Inst.NeededState;
+      if (CurrentState != Inst.NeededState) {
+        auto [InsertPt, PhysLiveRegs] =
+            findStateChangeInsertionPoint(MBB, Block, &Inst);
+        emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState,
+                        PhysLiveRegs);
+        CurrentState = Inst.NeededState;
+      }
     }
 
     if (MBB.succ_empty())
@@ -501,9 +562,12 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context,
 
     ZAState OutState =
         BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
-    if (CurrentState != OutState)
-      emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState,
-                      OutState, Block.PhysLiveRegsAtExit);
+    if (CurrentState != OutState) {
+      auto [InsertPt, PhysLiveRegs] =
+          findStateChangeInsertionPoint(MBB, Block, Block.Insts.end());
+      emitStateChange(Context, MBB, InsertPt, CurrentState, OutState,
+                      PhysLiveRegs);
+    }
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index c684f9e..01a40c1 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -654,7 +654,6 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
     if (TotalCyclesBetweenCandidates < NumMFMACycles - 1)
       InstrsToUnpack.insert(&Instr);
   }
-  return;
 }
 
 void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
@@ -681,7 +680,6 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
   HiDstOp.setIsRenamable(DstOp.isRenamable());
 
   I.eraseFromParent();
-  return;
 }
 
 MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9945ecc..0d7b6d1 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -161,8 +161,8 @@ namespace {
     friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
       return TE.PseudoOpc < PseudoOpc;
     }
-    friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
-                                                const NEONLdStTableEntry &TE) {
+    [[maybe_unused]] friend bool operator<(unsigned PseudoOpc,
+                                           const NEONLdStTableEntry &TE) {
       return PseudoOpc < TE.PseudoOpc;
     }
   };
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 228114c..44c4830 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -57,6 +57,7 @@ def ResBindTy : DXILOpParamType;
 def ResPropsTy : DXILOpParamType;
 def SplitDoubleTy : DXILOpParamType;
 def BinaryWithCarryTy : DXILOpParamType;
+def DimensionsTy : DXILOpParamType;
 
 class DXILOpClass;
 
@@ -901,6 +902,13 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
   let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
+def GetDimensions : DXILOp<72, getDimensions> {
+  let Doc = "gets the dimensions of a buffer or texture";
+  let arguments = [HandleTy, Int32Ty];
+  let result = DimensionsTy;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
   let intrinsics = [
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 1aed8f9..944b2e6 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -261,6 +261,12 @@ static StructType *getBinaryWithCarryType(LLVMContext &Context) {
   return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
 }
 
+static StructType *getDimensionsType(LLVMContext &Ctx) {
+  Type *Int32Ty = Type::getInt32Ty(Ctx);
+  return getOrCreateStructType("dx.types.Dimensions",
+                               {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Ctx);
+}
+
 static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
                                     Type *OverloadTy) {
   switch (Kind) {
@@ -318,6 +324,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
     return getSplitDoubleType(Ctx);
   case OpParamType::BinaryWithCarryTy:
     return getBinaryWithCarryType(Ctx);
+  case OpParamType::DimensionsTy:
+    return getDimensionsType(Ctx);
   }
   llvm_unreachable("Invalid parameter kind");
   return nullptr;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 610d8b6..e46a393 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -627,6 +627,28 @@ public:
     });
   }
 
+  [[nodiscard]] bool lowerGetDimensionsX(Function &F) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+    Type *Int32Ty = IRB.getInt32Ty();
+
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      IRB.SetInsertPoint(CI);
+      Value *Handle =
+          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+      Value *Undef = UndefValue::get(Int32Ty);
+
+      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+          OpCode::GetDimensions, {Handle, Undef}, CI->getName(), Int32Ty);
+      if (Error E = OpCall.takeError())
+        return E;
+      Value *Dim = IRB.CreateExtractValue(*OpCall, 0);
+
+      CI->replaceAllUsesWith(Dim);
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
   [[nodiscard]] bool lowerGetPointer(Function &F) {
     // These should have already been handled in DXILResourceAccess, so we can
     // just clean up the dead prototype.
@@ -934,6 +956,9 @@ public:
       case Intrinsic::dx_resource_updatecounter:
         HasErrors |= lowerUpdateCounter(F);
         break;
+      case Intrinsic::dx_resource_getdimensions_x:
+        HasErrors |= lowerGetDimensionsX(F);
+        break;
       case Intrinsic::ctpop:
         HasErrors |= lowerCtpopToCountBits(F);
         break;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 82c43ff..26a8728 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {}
 /// Returns the bit offset to backpatch with the location of the real VST.
 void DXILBitcodeWriter::writeModuleInfo() {
   // Emit various pieces of data attached to a module.
-  if (!M.getTargetTriple().empty())
-    writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE,
-                      M.getTargetTriple().str(), 0 /*TODO*/);
-  const std::string &DL = M.getDataLayoutStr();
-  if (!DL.empty())
-    writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
+  // We need to hardcode a triple and datalayout that's compatible with the
+  // historical DXIL triple and datalayout from DXC.
+  StringRef Triple = "dxil-ms-dx";
+  StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-"
+                 "f16:32-f32:32-f64:64-n8:16:32:64";
+  writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/);
+  writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
   if (!M.getModuleInlineAsm().empty())
     writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
                       0 /*TODO*/);
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
index 1eb03bf..725f2b1 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
@@ -149,11 +149,6 @@ public:
     std::string Data;
     llvm::raw_string_ostream OS(Data);
 
-    Triple OriginalTriple = M.getTargetTriple();
-    // Set to DXIL triple when write to bitcode.
-    // Only the output bitcode need to be DXIL triple.
-    M.setTargetTriple(Triple("dxil-ms-dx"));
-
     // Perform late legalization of lifetime intrinsics that would otherwise
     // fail the Module Verifier if performed in an earlier pass
     legalizeLifetimeIntrinsics(M);
@@ -165,9 +160,6 @@ public:
     // not-so-legal legalizations
     removeLifetimeIntrinsics(M);
 
-    // Recover triple.
-    M.setTargetTriple(OriginalTriple);
-
     Constant *ModuleConstant =
         ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data));
     auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true,
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 5f180d6..3bd6ed4 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -66,6 +66,10 @@ public:
 
   void remapInstruction(MCInst &Instr) const;
 
+  Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
+                               ArrayRef<uint8_t> Bytes,
+                               uint64_t Address) const override;
+
 private:
   bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
                   uint64_t &BytesToSkip, raw_ostream &CS) const;
@@ -567,6 +571,18 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
   return Result;
 }
 
+Expected<bool> HexagonDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
+                                                  uint64_t &Size,
+                                                  ArrayRef<uint8_t> Bytes,
+                                                  uint64_t Address) const {
+  // At the start of a symbol, force a fresh packet by resetting any
+  // in-progress bundle state. This prevents packets from straddling label
+  // boundaries when data (e.g. jump tables) appears in between.
+  Size = 0;
+  resetBundle();
+  return true;
+}
+
 static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
                                         ArrayRef<MCPhysReg> Table) {
   if (RegNo < Table.size()) {
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 52e6b0b..68f5312 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -174,8 +174,8 @@ namespace {
     const TargetRegisterInfo *TRI;
   };
 
-  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
-    LLVM_ATTRIBUTE_UNUSED;
+  [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+                                           const PrintRegSet &P);
   raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
     OS << '{';
     for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 14b6bb3..9087f9d 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -272,15 +272,14 @@ namespace {
       OS << *I << ' ' << **I << '\n';
   }
 
-  raw_ostream &operator<< (raw_ostream &OS,
-                           const NodeVect &S) LLVM_ATTRIBUTE_UNUSED;
+  [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const NodeVect &S);
   raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) {
     dump_node_container(OS, S);
     return OS;
   }
 
-  raw_ostream &operator<< (raw_ostream &OS,
-                           const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
+  [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+                                           const NodeToUsesMap &M);
   raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
     for (const auto &I : M) {
       const UseSet &Us = I.second;
@@ -914,9 +913,8 @@ namespace {
     const NodeToValueMap &Map;
   };
 
-  raw_ostream &operator<< (raw_ostream &OS,
-                           const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
-  raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
+  [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+                                           const LocationAsBlock &Loc) {
     for (const auto &I : Loc.Map) {
       OS << I.first << " -> ";
       if (BasicBlock *B = cast_or_null<BasicBlock>(I.second))
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 14a7ae7..3900aac 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -132,8 +132,7 @@ namespace {
     const TargetRegisterInfo &TRI;
     friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
   };
-  raw_ostream &operator<<(raw_ostream &OS,
-                          const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+  [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P);
   raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
     OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
        << ", PredR:" << printReg(P.FP.PredR, &P.TRI)
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index f9fdab4..9c81e96 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -51,11 +51,11 @@ private:
   const TargetRegisterInfo &TRI;
 };
 
-  raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
-    LLVM_ATTRIBUTE_UNUSED;
-  raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
-    return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
-  }
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+                                         const PrintRegister &PR);
+raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) {
+  return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
+}
 
   class HexagonGenPredicate : public MachineFunctionPass {
   public:
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index bfea50e..6b48a21 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -422,12 +422,12 @@ static MCTargetStreamer *createHexagonNullTargetStreamer(MCStreamer &S) {
   return new HexagonTargetStreamer(S);
 }
 
-static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) {
+[[maybe_unused]] static void clearFeature(MCSubtargetInfo *STI, uint64_t F) {
   if (STI->hasFeature(F))
     STI->ToggleFeature(F);
 }
 
-static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) {
+[[maybe_unused]] static bool checkFeature(MCSubtargetInfo *STI, uint64_t F) {
   return STI->hasFeature(F);
 }
 
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 8851a0f..e857b2d 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3356,10 +3356,10 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
 
 bool isValidInsnFormat(StringRef Format, const MCSubtargetInfo &STI) {
   return StringSwitch<bool>(Format)
-      .Cases("r", "r4", "i", "b", "sb", "u", "j", "uj", "s", true)
-      .Cases("cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj",
+      .Cases({"r", "r4", "i", "b", "sb", "u", "j", "uj", "s"}, true)
+      .Cases({"cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj"},
              STI.hasFeature(RISCV::FeatureStdExtZca))
-      .Cases("qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es",
+      .Cases({"qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es"},
              !STI.hasFeature(RISCV::Feature64Bit))
       .Default(false);
 }
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 662d3f6..b1794b7 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -717,6 +717,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
       .clampScalar(0, sXLen, sXLen)
       .lower();
 
+  LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) {
+    LLT VecTy = Query.Types[0];
+    LLT EltTy = Query.Types[1];
+    return VecTy.getElementType() == EltTy;
+  };
+
+  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
+      .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+                   InsertVectorEltPred, typeIs(2, sXLen)))
+      .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), InsertVectorEltPred,
+                   typeIs(2, sXLen)));
+
   getLegacyLegalizerInfo().computeTables();
   verify(*ST.getInstrInfo());
 }
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 50730c6..ab93bba 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -43,7 +43,7 @@ const llvm::StringRef RISCVLMULInstrument::DESC_NAME = "RISCV-LMUL";
 bool RISCVLMULInstrument::isDataValid(llvm::StringRef Data) {
   // Return true if not one of the valid LMUL strings
   return StringSwitch<bool>(Data)
-      .Cases("M1", "M2", "M4", "M8", "MF2", "MF4", "MF8", true)
+      .Cases({"M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"}, true)
       .Default(false);
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index f863392a..637d61fe 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -270,7 +270,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
 // and floating point computation.
 // The V pipeline is modeled by the VCQ, VA, VL, and VS resources. There can
 // be one or two VA (Vector Arithmetic).
-multiclass SiFive7ProcResources<bit extraVALU = false> {
+multiclass SiFive7ProcResources<bit dualVALU = false> {
   let BufferSize = 0 in {
     def PipeA     : ProcResource<1>;
     def PipeB     : ProcResource<1>;
@@ -279,7 +279,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> {
     def FDiv      : ProcResource<1>; // FP Division/Sqrt
 
     // Arithmetic sequencer(s)
-    if extraVALU then {
+    if dualVALU then {
       // VA1 can handle any vector airthmetic instruction.
       def VA1     : ProcResource<1>;
       // VA2 generally can only handle simple vector arithmetic.
@@ -305,7 +305,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> {
   def PipeAB : ProcResGroup<[!cast<ProcResource>(NAME#"PipeA"),
                              !cast<ProcResource>(NAME#"PipeB")]>;
 
-  if extraVALU then
+  if dualVALU then
   def VA1OrVA2 : ProcResGroup<[!cast<ProcResource>(NAME#"VA1"),
                                !cast<ProcResource>(NAME#"VA2")]>;
 }
@@ -1550,10 +1550,10 @@ multiclass SiFive7ReadAdvance {
 /// This multiclass is a "bundle" of (1) processor resources (i.e. pipes) and
 /// (2) WriteRes entries. It's parameterized by config values that will
 /// eventually be supplied by different SchedMachineModels.
-multiclass SiFive7SchedResources<int vlen, bit extraVALU,
+multiclass SiFive7SchedResources<int vlen, bit dualVALU,
                                  SiFive7FPLatencies fpLatencies,
                                  bit hasFastGather> {
-  defm SiFive7 : SiFive7ProcResources<extraVALU>;
+  defm SiFive7 : SiFive7ProcResources<dualVALU>;
 
   // Pull out defs from SiFive7ProcResources so we can refer to them by name.
   defvar SiFive7PipeA = !cast<ProcResource>(NAME # SiFive7PipeA);
@@ -1562,10 +1562,10 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
   defvar SiFive7IDiv = !cast<ProcResource>(NAME # SiFive7IDiv);
   defvar SiFive7FDiv = !cast<ProcResource>(NAME # SiFive7FDiv);
   // Pass SiFive7VA for VA1 and VA1OrVA2 if there is only 1 VALU.
-  defvar SiFive7VA1 = !if (extraVALU,
+  defvar SiFive7VA1 = !if (dualVALU,
                             !cast<ProcResource>(NAME # SiFive7VA1),
                             !cast<ProcResource>(NAME # SiFive7VA));
-  defvar SiFive7VA1OrVA2 = !if (extraVALU,
+  defvar SiFive7VA1OrVA2 = !if (dualVALU,
                             !cast<ProcResGroup>(NAME # SiFive7VA1OrVA2),
                             !cast<ProcResource>(NAME # SiFive7VA));
   defvar SiFive7VA = !cast<ProcResource>(NAME # SiFive7VA);
@@ -1608,7 +1608,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
                              HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
                              HasStdExtZkr];
   int VLEN = vlen;
-  bit HasExtraVALU = false;
+  bit HasDualVALU = false;
 
   SiFive7FPLatencies FPLatencies;
   bit HasFastGather = false;
@@ -1635,7 +1635,7 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
 }
 
 def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
-  let HasExtraVALU = true;
+  let HasDualVALU = true;
   let FPLatencies = SiFive7LowFPLatencies;
   let HasFastGather = true;
 }
@@ -1643,7 +1643,7 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
 /// Binding models to their scheduling resources.
 foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
   let SchedModel = model in
-  defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
+  defm model.Name : SiFive7SchedResources<model.VLEN, model.HasDualVALU,
                                           model.FPLatencies,
                                           model.HasFastGather>;
 }
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index a466ab2..a0cff4d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3765,7 +3765,6 @@ void SPIRVInstructionSelector::decorateUsesAsNonUniform(
                       SPIRV::Decoration::NonUniformEXT, {});
     }
   }
-  return;
 }
 
 bool SPIRVInstructionSelector::extractSubvector(
diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 2934c88..fa08d44 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -246,8 +246,7 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
   }
 }
 
-static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
-{
+[[maybe_unused]] static bool verifyLeafProcRegUse(MachineRegisterInfo *MRI) {
 
   for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
     if (MRI->isPhysRegUsed(reg))
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
index d9c8e22..6e99fc3 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
@@ -23,7 +23,7 @@ std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) {
       .Case("i64", wasm::ValType::I64)
       .Case("f32", wasm::ValType::F32)
       .Case("f64", wasm::ValType::F64)
-      .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2",
+      .Cases({"v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2"},
              wasm::ValType::V128)
       .Case("funcref", wasm::ValType::FUNCREF)
       .Case("externref", wasm::ValType::EXTERNREF)
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a8908d4..ac251fd 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3514,15 +3514,16 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // xacquire <insn>      ; xacquire must be accompanied by 'lock'
   bool IsPrefix =
       StringSwitch<bool>(Name)
-          .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
-          .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
-          .Cases("xacquire", "xrelease", true)
-          .Cases("acquire", "release", isParsingIntelSyntax())
+          .Cases({"cs", "ds", "es", "fs", "gs", "ss"}, true)
+          .Cases({"rex64", "data32", "data16", "addr32", "addr16"}, true)
+          .Cases({"xacquire", "xrelease"}, true)
+          .Cases({"acquire", "release"}, isParsingIntelSyntax())
           .Default(false);
 
   auto isLockRepeatNtPrefix = [](StringRef N) {
     return StringSwitch<bool>(N)
-        .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
+        .Cases({"lock", "rep", "repe", "repz", "repne", "repnz", "notrack"},
+               true)
         .Default(false);
   };
 
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 2bf016a..6db780f 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1338,7 +1338,6 @@ def ProcessorFeatures {
   list<SubtargetFeature> PTLFeatures =
     !listremove(ARLSFeatures, [FeatureWIDEKL]);
 
-
   // Clearwaterforest
   list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
                                                   FeatureAVXVNNIINT16,
@@ -1880,8 +1879,10 @@ def : ProcModel<P, AlderlakePModel,
 }
 def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures,
                 ProcessorFeatures.ADLTuning>;
-def : ProcModel<"pantherlake", AlderlakePModel,
+foreach P = ["pantherlake", "wildcatlake"] in {
+def : ProcModel<P, AlderlakePModel,
                 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
+}
 def : ProcModel<"clearwaterforest", AlderlakePModel,
                 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
 def : ProcModel<"emeraldrapids", SapphireRapidsModel,
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index e0991aa..9f88fda 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -602,8 +602,7 @@ namespace {
     friend bool operator<(const TableEntry &TE, unsigned V) {
       return TE.from < V;
     }
-    friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
-                                                const TableEntry &TE) {
+    [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) {
       return V < TE.from;
     }
   };
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c32b1a6..a0b64ff 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58342,11 +58342,12 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
   } else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) {
     SDValue Src = Op1;
     SDValue Op10 = Op1.getOperand(0);
-    if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1))) {
-      // res, flags2 = sub 0, (and (xor X, -1), Y)
+    if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1)) &&
+        llvm::isOneConstant(Op1.getOperand(1))) {
+      // res, flags2 = sub 0, (and (xor X, -1), 1)
       // cload/cstore ..., cond_ne, flag2
       // ->
-      // res, flags2 = sub 0, (and X, Y)
+      // res, flags2 = sub 0, (and X, 1)
       // cload/cstore ..., cond_e, flag2
       Src = DAG.getNode(ISD::AND, DL, Op1.getValueType(), Op10.getOperand(0),
                         Op1.getOperand(1));
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 0fd44b7..ec31675 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
           (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
           (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+
+// If the globaladdr is an absolute_symbol, don't bother using the sign extending
+// instruction since there's no benefit to using it with absolute symbols.
+def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{
+  auto *GA = cast<GlobalAddressSDNode>(N);
+  return !GA->getGlobal()->getAbsoluteSymbolRange();
+}]>;
+def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)),
+          (MOV64ri32 tglobaladdr:$dst)>,
+      Requires<[KernelCode]>;
+
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper mcsym:$dst)),
diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
index 89d5e0d..f6cea85 100644
--- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
@@ -22,13 +22,13 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
       .Case("v5e", "v5te")
       .Case("v6j", "v6")
       .Case("v6hl", "v6k")
-      .Cases("v6m", "v6sm", "v6s-m", "v6-m")
-      .Cases("v6z", "v6zk", "v6kz")
-      .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
+      .Cases({"v6m", "v6sm", "v6s-m"}, "v6-m")
+      .Cases({"v6z", "v6zk"}, "v6kz")
+      .Cases({"v7", "v7a", "v7hl", "v7l"}, "v7-a")
       .Case("v7r", "v7-r")
       .Case("v7m", "v7-m")
       .Case("v7em", "v7e-m")
-      .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a")
+      .Cases({"v8", "v8a", "v8l", "aarch64", "arm64"}, "v8-a")
       .Case("v8.1a", "v8.1-a")
       .Case("v8.2a", "v8.2-a")
       .Case("v8.3a", "v8.3-a")
@@ -39,7 +39,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
       .Case("v8.8a", "v8.8-a")
       .Case("v8.9a", "v8.9-a")
       .Case("v8r", "v8-r")
-      .Cases("v9", "v9a", "v9-a")
+      .Cases({"v9", "v9a"}, "v9-a")
       .Case("v9.1a", "v9.1-a")
       .Case("v9.2a", "v9.2-a")
       .Case("v9.3a", "v9.3-a")
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index a5bdc9d..3479106 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -70,8 +70,8 @@
 
 using namespace llvm;
 
-static std::unique_ptr<llvm::MemoryBuffer>
-    LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
+[[maybe_unused]] static std::unique_ptr<llvm::MemoryBuffer>
+getProcCpuinfoContent() {
   const char *CPUInfoFile = "/proc/cpuinfo";
   if (const char *CpuinfoIntercept = std::getenv("LLVM_CPUINFO"))
     CPUInfoFile = CpuinfoIntercept;
@@ -964,6 +964,13 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
       *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
       break;
 
+    // Wildcatlake:
+    case 0xd5:
+      CPU = "wildcatlake";
+      *Type = X86::INTEL_COREI7;
+      *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
+      break;
+
     // Graniterapids:
     case 0xad:
       CPU = "graniterapids";
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index 950bb2b..d765d9c 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -548,8 +548,11 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
   case Triple::csky:
     return computeCSKYDataLayout(*this);
   case Triple::dxil:
+    // TODO: We need to align vectors on the element size generally, but for now
+    // we hard code this for 3-element 32- and 64-bit vectors as a workaround.
+    // See https://github.com/llvm/llvm-project/issues/123968
     return "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-"
-           "f32:32-f64:64-n8:16:32:64";
+           "f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64";
   case Triple::hexagon:
     return "e-m:e-p:32:32:32-a:0-n16:32-"
            "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index f021094..1068ce4 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -579,87 +579,89 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
 }
 
 static Triple::ArchType parseArch(StringRef ArchName) {
-  auto AT = StringSwitch<Triple::ArchType>(ArchName)
-                .Cases("i386", "i486", "i586", "i686", Triple::x86)
-                // FIXME: Do we need to support these?
-                .Cases("i786", "i886", "i986", Triple::x86)
-                .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64)
-                .Cases("powerpc", "powerpcspe", "ppc", "ppc32", Triple::ppc)
-                .Cases("powerpcle", "ppcle", "ppc32le", Triple::ppcle)
-                .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64)
-                .Cases("powerpc64le", "ppc64le", Triple::ppc64le)
-                .Case("xscale", Triple::arm)
-                .Case("xscaleeb", Triple::armeb)
-                .Case("aarch64", Triple::aarch64)
-                .Case("aarch64_be", Triple::aarch64_be)
-                .Case("aarch64_32", Triple::aarch64_32)
-                .Case("arc", Triple::arc)
-                .Case("arm64", Triple::aarch64)
-                .Case("arm64_32", Triple::aarch64_32)
-                .Case("arm64e", Triple::aarch64)
-                .Case("arm64ec", Triple::aarch64)
-                .Case("arm", Triple::arm)
-                .Case("armeb", Triple::armeb)
-                .Case("thumb", Triple::thumb)
-                .Case("thumbeb", Triple::thumbeb)
-                .Case("avr", Triple::avr)
-                .Case("m68k", Triple::m68k)
-                .Case("msp430", Triple::msp430)
-                .Cases("mips", "mipseb", "mipsallegrex", "mipsisa32r6",
-                       "mipsr6", Triple::mips)
-                .Cases("mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el",
-                       Triple::mipsel)
-                .Cases("mips64", "mips64eb", "mipsn32", "mipsisa64r6",
-                       "mips64r6", "mipsn32r6", Triple::mips64)
-                .Cases("mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el",
-                       "mipsn32r6el", Triple::mips64el)
-                .Case("r600", Triple::r600)
-                .Case("amdgcn", Triple::amdgcn)
-                .Case("riscv32", Triple::riscv32)
-                .Case("riscv64", Triple::riscv64)
-                .Case("riscv32be", Triple::riscv32be)
-                .Case("riscv64be", Triple::riscv64be)
-                .Case("hexagon", Triple::hexagon)
-                .Cases("s390x", "systemz", Triple::systemz)
-                .Case("sparc", Triple::sparc)
-                .Case("sparcel", Triple::sparcel)
-                .Cases("sparcv9", "sparc64", Triple::sparcv9)
-                .Case("tce", Triple::tce)
-                .Case("tcele", Triple::tcele)
-                .Case("xcore", Triple::xcore)
-                .Case("nvptx", Triple::nvptx)
-                .Case("nvptx64", Triple::nvptx64)
-                .Case("amdil", Triple::amdil)
-                .Case("amdil64", Triple::amdil64)
-                .Case("hsail", Triple::hsail)
-                .Case("hsail64", Triple::hsail64)
-                .Case("spir", Triple::spir)
-                .Case("spir64", Triple::spir64)
-                .Cases("spirv", "spirv1.5", "spirv1.6", Triple::spirv)
-                .Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2",
-                       "spirv32v1.3", "spirv32v1.4", "spirv32v1.5",
-                       "spirv32v1.6", Triple::spirv32)
-                .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2",
-                       "spirv64v1.3", "spirv64v1.4", "spirv64v1.5",
-                       "spirv64v1.6", Triple::spirv64)
-                .StartsWith("kalimba", Triple::kalimba)
-                .Case("lanai", Triple::lanai)
-                .Case("renderscript32", Triple::renderscript32)
-                .Case("renderscript64", Triple::renderscript64)
-                .Case("shave", Triple::shave)
-                .Case("ve", Triple::ve)
-                .Case("wasm32", Triple::wasm32)
-                .Case("wasm64", Triple::wasm64)
-                .Case("csky", Triple::csky)
-                .Case("loongarch32", Triple::loongarch32)
-                .Case("loongarch64", Triple::loongarch64)
-                .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
-                       "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7",
-                       "dxilv1.8", Triple::dxil)
-                // Note: Cases has max limit of 10.
-                .Case("dxilv1.9", Triple::dxil)
-                .Case("xtensa", Triple::xtensa)
-                .Default(Triple::UnknownArch);
+  auto AT =
+      StringSwitch<Triple::ArchType>(ArchName)
+          .Cases({"i386", "i486", "i586", "i686"}, Triple::x86)
+          // FIXME: Do we need to support these?
+          .Cases({"i786", "i886", "i986"}, Triple::x86)
+          .Cases({"amd64", "x86_64", "x86_64h"}, Triple::x86_64)
+          .Cases({"powerpc", "powerpcspe", "ppc", "ppc32"}, Triple::ppc)
+          .Cases({"powerpcle", "ppcle", "ppc32le"}, Triple::ppcle)
+          .Cases({"powerpc64", "ppu", "ppc64"}, Triple::ppc64)
+          .Cases({"powerpc64le", "ppc64le"}, Triple::ppc64le)
+          .Case("xscale", Triple::arm)
+          .Case("xscaleeb", Triple::armeb)
+          .Case("aarch64", Triple::aarch64)
+          .Case("aarch64_be", Triple::aarch64_be)
+          .Case("aarch64_32", Triple::aarch64_32)
+          .Case("arc", Triple::arc)
+          .Case("arm64", Triple::aarch64)
+          .Case("arm64_32", Triple::aarch64_32)
+          .Case("arm64e", Triple::aarch64)
+          .Case("arm64ec", Triple::aarch64)
+          .Case("arm", Triple::arm)
+          .Case("armeb", Triple::armeb)
+          .Case("thumb", Triple::thumb)
+          .Case("thumbeb", Triple::thumbeb)
+          .Case("avr", Triple::avr)
+          .Case("m68k", Triple::m68k)
+          .Case("msp430", Triple::msp430)
+          .Cases({"mips", "mipseb", "mipsallegrex", "mipsisa32r6", "mipsr6"},
+                 Triple::mips)
+          .Cases({"mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el"},
+                 Triple::mipsel)
+          .Cases({"mips64", "mips64eb", "mipsn32", "mipsisa64r6", "mips64r6",
+                  "mipsn32r6"},
+                 Triple::mips64)
+          .Cases({"mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el",
+                  "mipsn32r6el"},
+                 Triple::mips64el)
+          .Case("r600", Triple::r600)
+          .Case("amdgcn", Triple::amdgcn)
+          .Case("riscv32", Triple::riscv32)
+          .Case("riscv64", Triple::riscv64)
+          .Case("riscv32be", Triple::riscv32be)
+          .Case("riscv64be", Triple::riscv64be)
+          .Case("hexagon", Triple::hexagon)
+          .Cases({"s390x", "systemz"}, Triple::systemz)
+          .Case("sparc", Triple::sparc)
+          .Case("sparcel", Triple::sparcel)
+          .Cases({"sparcv9", "sparc64"}, Triple::sparcv9)
+          .Case("tce", Triple::tce)
+          .Case("tcele", Triple::tcele)
+          .Case("xcore", Triple::xcore)
+          .Case("nvptx", Triple::nvptx)
+          .Case("nvptx64", Triple::nvptx64)
+          .Case("amdil", Triple::amdil)
+          .Case("amdil64", Triple::amdil64)
+          .Case("hsail", Triple::hsail)
+          .Case("hsail64", Triple::hsail64)
+          .Case("spir", Triple::spir)
+          .Case("spir64", Triple::spir64)
+          .Cases({"spirv", "spirv1.5", "spirv1.6"}, Triple::spirv)
+          .Cases({"spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2",
+                  "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", "spirv32v1.6"},
+                 Triple::spirv32)
+          .Cases({"spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2",
+                  "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", "spirv64v1.6"},
+                 Triple::spirv64)
+          .StartsWith("kalimba", Triple::kalimba)
+          .Case("lanai", Triple::lanai)
+          .Case("renderscript32", Triple::renderscript32)
+          .Case("renderscript64", Triple::renderscript64)
+          .Case("shave", Triple::shave)
+          .Case("ve", Triple::ve)
+          .Case("wasm32", Triple::wasm32)
+          .Case("wasm64", Triple::wasm64)
+          .Case("csky", Triple::csky)
+          .Case("loongarch32", Triple::loongarch32)
+          .Case("loongarch64", Triple::loongarch64)
+          .Cases({"dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
+                  "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7", "dxilv1.8",
+                  "dxilv1.9"},
+                 Triple::dxil)
+          .Case("xtensa", Triple::xtensa)
+          .Default(Triple::UnknownArch);
 
   // Some architectures require special parsing logic just to compute the
   // ArchType result.
@@ -1071,7 +1073,7 @@ Triple::Triple(std::string &&Str) : Data(std::move(Str)) {
               .StartsWith("mips64", Triple::GNUABI64)
               .StartsWith("mipsisa64", Triple::GNUABI64)
               .StartsWith("mipsisa32", Triple::GNU)
-              .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU)
+              .Cases({"mips", "mipsel", "mipsr6", "mipsr6el"}, Triple::GNU)
               .Default(UnknownEnvironment);
     }
   }
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 1932a3a..e382cfe 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -378,6 +378,7 @@ constexpr ProcInfo Processors[] = {
   { {"gracemont"}, CK_Gracemont, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
   // Pantherlake microarchitecture based processors.
   { {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
+  { {"wildcatlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
   // Sierraforest microarchitecture based processors.
   { {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
   // Grandridge microarchitecture based processors.
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 7c78eb3..444b390 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -396,9 +396,8 @@ class CHR {
 
 } // end anonymous namespace
 
-static inline
-raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
-                                              const CHRStats &Stats) {
+[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS,
+                                                       const CHRStats &Stats) {
   Stats.print(OS);
   return OS;
 }
@@ -425,8 +424,8 @@ static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) {
   return PSI.isFunctionEntryHot(&F);
 }
 
-static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
-                                         CHRStats *Stats) {
+[[maybe_unused]] static void dumpIR(Function &F, const char *Label,
+                                    CHRStats *Stats) {
   StringRef FuncName = F.getName();
   StringRef ModuleName = F.getParent()->getName();
   (void)(FuncName); // Unused in release build.
@@ -1622,7 +1621,7 @@ static void insertTrivialPHIs(CHRScope *Scope,
 }
 
 // Assert that all the CHR regions of the scope have a biased branch or select.
-static void LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]] static void
 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
 #ifndef NDEBUG
   auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
@@ -1644,8 +1643,9 @@ assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
 
 // Assert that all the condition values of the biased branches and selects have
 // been hoisted to the pre-entry block or outside of the scope.
-static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
-    CHRScope *Scope, BasicBlock *PreEntryBlock) {
+[[maybe_unused]] static void
+assertBranchOrSelectConditionHoisted(CHRScope *Scope,
+                                     BasicBlock *PreEntryBlock) {
   CHR_DEBUG(dbgs() << "Biased regions condition values \n");
   for (RegInfo &RI : Scope->CHRRegions) {
     Region *R = RI.R;
@@ -2007,8 +2007,8 @@ void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
   }
 }
 
-static void LLVM_ATTRIBUTE_UNUSED
-dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
+[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes,
+                                        const char *Label) {
   dbgs() << Label << " " << Scopes.size() << "\n";
   for (CHRScope *Scope : Scopes) {
     dbgs() << *Scope << "\n";
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 09db464..386e48f 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -326,8 +326,7 @@ const unsigned BBState::OverflowOccurredValue = 0xffffffff;
 
 namespace llvm {
 
-raw_ostream &operator<<(raw_ostream &OS,
-                        BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, BBState &BBState);
 
 } // end namespace llvm
 
diff --git a/llvm/lib/Transforms/ObjCARC/PtrState.h b/llvm/lib/Transforms/ObjCARC/PtrState.h
index 232db2b..5cc4212 100644
--- a/llvm/lib/Transforms/ObjCARC/PtrState.h
+++ b/llvm/lib/Transforms/ObjCARC/PtrState.h
@@ -47,8 +47,7 @@ enum Sequence {
   S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
 };
 
-raw_ostream &operator<<(raw_ostream &OS,
-                        const Sequence S) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const Sequence S);
 
 /// Unidirectional information about either a
 /// retain-decrement-use-release sequence or release-use-decrement-retain
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 7ad710d..6141b6d 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -77,6 +77,7 @@
 #include "llvm/Support/DebugCounter.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -805,9 +806,8 @@ tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
   return nullptr;
 }
 
-namespace {
 // Returns true if \p I is an intrinsic that does not read or write memory.
-bool isNoopIntrinsic(Instruction *I) {
+static bool isNoopIntrinsic(Instruction *I) {
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
     switch (II->getIntrinsicID()) {
     case Intrinsic::lifetime_start:
@@ -828,7 +828,7 @@ bool isNoopIntrinsic(Instruction *I) {
 }
 
 // Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
   Instruction *DI = D->getMemoryInst();
   // Calls that only access inaccessible memory cannot read or write any memory
   // locations we consider for elimination.
@@ -856,6 +856,8 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
   return false;
 }
 
+namespace {
+
 // A memory location wrapper that represents a MemoryLocation, `MemLoc`,
 // defined by `MemDef`.
 struct MemoryLocationWrapper {
@@ -889,23 +891,25 @@ struct MemoryDefWrapper {
   SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
 };
 
-bool hasInitializesAttr(Instruction *I) {
-  CallBase *CB = dyn_cast<CallBase>(I);
-  return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
-}
-
 struct ArgumentInitInfo {
   unsigned Idx;
   bool IsDeadOrInvisibleOnUnwind;
   ConstantRangeList Inits;
 };
+} // namespace
+
+static bool hasInitializesAttr(Instruction *I) {
+  CallBase *CB = dyn_cast<CallBase>(I);
+  return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
+}
 
 // Return the intersected range list of the initializes attributes of "Args".
 // "Args" are call arguments that alias to each other.
 // If any argument in "Args" doesn't have dead_on_unwind attr and
 // "CallHasNoUnwindAttr" is false, return empty.
-ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
-                                              bool CallHasNoUnwindAttr) {
+static ConstantRangeList
+getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
+                            bool CallHasNoUnwindAttr) {
   if (Args.empty())
     return {};
 
@@ -925,6 +929,8 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
   return IntersectedIntervals;
 }
 
+namespace {
+
 struct DSEState {
   Function &F;
   AliasAnalysis &AA;
@@ -2328,10 +2334,11 @@ struct DSEState {
   // change state: whether make any change.
   bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
 };
+} // namespace
 
 // Return true if "Arg" is function local and isn't captured before "CB".
-bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
-                               EarliestEscapeAnalysis &EA) {
+static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
+                                      EarliestEscapeAnalysis &EA) {
   const Value *UnderlyingObj = getUnderlyingObject(Arg);
   return isIdentifiedFunctionLocal(UnderlyingObj) &&
          capturesNothing(
@@ -2627,7 +2634,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
 
   return MadeChange;
 }
-} // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 // DSE Pass
@@ -2728,8 +2734,6 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
                     false)
 
-namespace llvm {
-LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
+LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {
   return new DSELegacyPass();
 }
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 1c88532..b9534def 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -73,24 +73,17 @@
 #include <utility>
 
 using namespace llvm;
+using namespace llvm::GVNExpression;
 
 #define DEBUG_TYPE "gvn-sink"
 
 STATISTIC(NumRemoved, "Number of instructions removed");
 
-namespace llvm {
-namespace GVNExpression {
-
 LLVM_DUMP_METHOD void Expression::dump() const {
   print(dbgs());
   dbgs() << "\n";
 }
 
-} // end namespace GVNExpression
-} // end namespace llvm
-
-namespace {
-
 static bool isMemoryInst(const Instruction *I) {
   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
@@ -99,6 +92,8 @@ static bool isMemoryInst(const Instruction *I) {
 
 //===----------------------------------------------------------------------===//
 
+namespace {
+
 /// Candidate solution for sinking. There may be different ways to
 /// sink instructions, differing in the number of instructions sunk,
 /// the number of predecessors sunk from and the number of PHIs
@@ -125,14 +120,6 @@ struct SinkingInstructionCandidate {
   }
 };
 
-#ifndef NDEBUG
-raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
-  OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
-     << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
-  return OS;
-}
-#endif
-
 //===----------------------------------------------------------------------===//
 
 /// Describes a PHI node that may or may not exist. These track the PHIs
@@ -256,8 +243,18 @@ public:
     return Values == Other.Values && Blocks == Other.Blocks;
   }
 };
+} // namespace
 
-template <typename ModelledPHI> struct DenseMapInfo {
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+                               const SinkingInstructionCandidate &C) {
+  OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
+     << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
+  return OS;
+}
+#endif
+
+template <> struct llvm::DenseMapInfo<ModelledPHI> {
   static inline ModelledPHI &getEmptyKey() {
     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
     return Dummy;
@@ -275,7 +272,9 @@ template <typename ModelledPHI> struct DenseMapInfo {
   }
 };
 
-using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
+using ModelledPHISet = DenseSet<ModelledPHI>;
+
+namespace {
 
 //===----------------------------------------------------------------------===//
 //                             ValueTable
@@ -290,7 +289,7 @@ using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
 ///
 /// This class also contains fields for discriminators used when determining
 /// equivalence of instructions with sideeffects.
-class InstructionUseExpr : public GVNExpression::BasicExpression {
+class InstructionUseExpr : public BasicExpression {
   unsigned MemoryUseOrder = -1;
   bool Volatile = false;
   ArrayRef<int> ShuffleMask;
@@ -298,7 +297,7 @@ class InstructionUseExpr : public GVNExpression::BasicExpression {
 public:
   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
                      BumpPtrAllocator &A)
-      : GVNExpression::BasicExpression(I->getNumUses()) {
+      : BasicExpression(I->getNumUses()) {
     allocateOperands(R, A);
     setOpcode(I->getOpcode());
     setType(I->getType());
@@ -315,8 +314,8 @@ public:
   void setVolatile(bool V) { Volatile = V; }
 
   hash_code getHashValue() const override {
-    return hash_combine(GVNExpression::BasicExpression::getHashValue(),
-                        MemoryUseOrder, Volatile, ShuffleMask);
+    return hash_combine(BasicExpression::getHashValue(), MemoryUseOrder,
+                        Volatile, ShuffleMask);
   }
 
   template <typename Function> hash_code getHashValue(Function MapFn) {
@@ -332,7 +331,7 @@ using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
 
 class ValueTable {
   DenseMap<Value *, uint32_t> ValueNumbering;
-  DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
+  DenseMap<Expression *, uint32_t> ExpressionNumbering;
   DenseMap<size_t, uint32_t> HashNumbering;
   BumpPtrAllocator Allocator;
   ArrayRecycler<Value *> Recycler;
@@ -594,6 +593,7 @@ private:
     }
   }
 };
+} // namespace
 
 std::optional<SinkingInstructionCandidate>
 GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
@@ -851,8 +851,6 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
   NumRemoved += Insts.size() - 1;
 }
 
-} // end anonymous namespace
-
 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
   GVNSink G;
   if (!G.run(F))
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index d99f1eb..ddb99a5 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -75,8 +75,6 @@ static cl::opt<bool>
                                "expressed as branches by widenable conditions"),
                       cl::init(true));
 
-namespace {
-
 // Get the condition of \p I. It can either be a guard or a conditional branch.
 static Value *getCondition(Instruction *I) {
   if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
@@ -130,6 +128,8 @@ findInsertionPointForWideCondition(Instruction *WCOrGuard) {
   return std::nullopt;
 }
 
+namespace {
+
 class GuardWideningImpl {
   DominatorTree &DT;
   PostDominatorTree *PDT;
@@ -328,7 +328,7 @@ public:
   /// The entry point for this pass.
   bool run();
 };
-}
+} // namespace
 
 static bool isSupportedGuardInstruction(const Instruction *Insn) {
   if (isGuard(Insn))
diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
index 3c14036e..6fb8197 100644
--- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
@@ -26,8 +26,6 @@
 
 using namespace llvm;
 
-namespace llvm {
-
 static cl::opt<unsigned>
     JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden,
                            cl::desc("Only split jump tables with size less or "
@@ -43,8 +41,8 @@ static cl::opt<unsigned> FunctionSizeThreshold(
              "or equal than this threshold."),
     cl::init(50));
 
+namespace llvm {
 extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
 } // end namespace llvm
 
 #define DEBUG_TYPE "jump-table-to-switch"
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 9655173..b2c526b 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -116,8 +116,6 @@ STATISTIC(NumIntAssociationsHoisted,
 STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
                                     "reassociated and hoisted out of the loop");
 
-namespace llvm {
-
 /// Memory promotion is enabled by default.
 static cl::opt<bool>
     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
@@ -156,7 +154,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit(
 // which may not be precise, since optimizeUses is capped. The result is
 // correct, but we may not get as "far up" as possible to get which access is
 // clobbering the one queried.
-cl::opt<unsigned> SetLicmMssaOptCap(
+cl::opt<unsigned> llvm::SetLicmMssaOptCap(
     "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
              "for faster compile. Caps the MemorySSA clobbering calls."));
@@ -164,15 +162,15 @@ cl::opt<unsigned> SetLicmMssaOptCap(
 // Experimentally, memory promotion carries less importance than sinking and
 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
 // compile time.
-cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap(
+cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
     "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
     cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
              "effect. When MSSA in LICM is enabled, then this is the maximum "
              "number of accesses allowed to be present in a loop in order to "
              "enable memory promotion."));
 
+namespace llvm {
 extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
 } // end namespace llvm
 
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
@@ -1120,11 +1118,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
   return false;
 }
 
-namespace {
 /// Return true if-and-only-if we know how to (mechanically) both hoist and
 /// sink a given instruction out of a loop.  Does not address legality
 /// concerns such as aliasing or speculation safety.
-bool isHoistableAndSinkableInst(Instruction &I) {
+static bool isHoistableAndSinkableInst(Instruction &I) {
   // Only these instructions are hoistable/sinkable.
   return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
           isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
@@ -1136,8 +1133,8 @@ bool isHoistableAndSinkableInst(Instruction &I) {
 }
 
 /// Return true if I is the only Instruction with a MemoryAccess in L.
-bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
-                        const MemorySSAUpdater &MSSAU) {
+static bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
+                               const MemorySSAUpdater &MSSAU) {
   for (auto *BB : L->getBlocks())
     if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
       int NotAPhi = 0;
@@ -1151,7 +1148,6 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
     }
   return true;
 }
-}
 
 static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
                                                BatchAAResults &BAA,
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 73f1942..7706de8 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -21,8 +21,7 @@
 
 #define DEBUG_TYPE "loop-bound-split"
 
-namespace llvm {
-
+using namespace llvm;
 using namespace PatternMatch;
 
 namespace {
@@ -358,8 +357,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
   IRBuilder<> Builder(&PostLoopPreHeader->front());
 
   // Update phi nodes in header of post-loop.
-  bool isExitingLatch =
-      (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+  bool isExitingLatch = L.getExitingBlock() == L.getLoopLatch();
   Value *ExitingCondLCSSAPhi = nullptr;
   for (PHINode &PN : L.getHeader()->phis()) {
     // Create LCSSA phi node in preheader of post-loop.
@@ -472,8 +470,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
 PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
                                           LoopStandardAnalysisResults &AR,
                                           LPMUpdater &U) {
-  Function &F = *L.getHeader()->getParent();
-  (void)F;
+  [[maybe_unused]] Function &F = *L.getHeader()->getParent();
 
   LLVM_DEBUG(dbgs() << "Spliting bound of loop in " << F.getName() << ": " << L
                     << "\n");
@@ -486,5 +483,3 @@ PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
 
   return getLoopPassPreservedAnalyses();
 }
-
-} // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 578fec7..a692009 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -551,12 +551,10 @@ public:
   }
 
   /// Support comparison with a single offset to allow binary searches.
-  friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
-                                              uint64_t RHSOffset) {
+  [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
     return LHS.beginOffset() < RHSOffset;
   }
-  friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
-                                              const Slice &RHS) {
+  [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
     return LHSOffset < RHS.beginOffset();
   }
 
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index b18acea..4fe736a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -1106,7 +1106,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
       }
 
       Phi.replaceAllUsesWith(RdxResult);
-      continue;
     }
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b96d29e..62a81ba 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8240,14 +8240,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   // the vector loop or when not folding the tail. In the later case, we know
   // that the canonical induction increment will not overflow as the vector trip
   // count is >= increment and a multiple of the increment.
+  VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
   bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
   if (!HasNUW) {
-    auto *IVInc = Plan->getVectorLoopRegion()
-                      ->getExitingBasicBlock()
-                      ->getTerminator()
-                      ->getOperand(0);
-    assert(match(IVInc, m_VPInstruction<Instruction::Add>(
-                            m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
+    auto *IVInc =
+        LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0);
+    assert(match(IVInc,
+                 m_VPInstruction<Instruction::Add>(
+                     m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) &&
            "Did not find the canonical IV increment");
     cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
   }
@@ -8293,7 +8293,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
 
   // Scan the body of the loop in a topological order to visit each basic block
   // after having visited its predecessor basic blocks.
-  VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
   VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
       HeaderVPBB);
@@ -8377,8 +8376,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   for (VPValue *Old : Old2New.keys())
     Old->getDefiningRecipe()->eraseFromParent();
 
-  assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
-         !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
+  assert(isa<VPRegionBlock>(LoopRegion) &&
+         !LoopRegion->getEntryBasicBlock()->empty() &&
          "entry block must be set to a VPRegionBlock having a non-empty entry "
          "VPBasicBlock");
 
@@ -9326,8 +9325,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
   if (ResumePhiIter == MainScalarPH->phis().end()) {
     VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
     ResumePhi = ScalarPHBuilder.createScalarPhi(
-        {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
-        "vec.epilog.resume.val");
+        {VectorTC,
+         MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()},
+        {}, "vec.epilog.resume.val");
   } else {
     ResumePhi = cast<VPPhi>(&*ResumePhiIter);
     if (MainScalarPH->begin() == MainScalarPH->end())
@@ -9354,7 +9354,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
   VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
   Header->setName("vec.epilog.vector.body");
 
-  VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+  VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV();
   // When vectorizing the epilogue loop, the canonical induction needs to be
   // adjusted by the value after the main vector loop. Find the resume value
   // created during execution of the main VPlan. It must be the first phi in the
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 23f5623..0e0b042 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1012,6 +1012,8 @@ public:
     // part if scalar. In the latter case, the recipe will be removed during
     // unrolling.
     ExtractLastElement,
+    // Extracts the last lane for each part from its operand.
+    ExtractLastLanePerPart,
     // Extracts the second-to-last lane from its operand or the second-to-last
     // part if it is scalar. In the latter case, the recipe will be removed
     // during unrolling.
@@ -4058,6 +4060,19 @@ public:
   /// Remove the current region from its VPlan, connecting its predecessor to
   /// its entry, and its exiting block to its successor.
   void dissolveToCFGLoop();
+
+  /// Returns the canonical induction recipe of the region.
+  VPCanonicalIVPHIRecipe *getCanonicalIV() {
+    VPBasicBlock *EntryVPBB = getEntryBasicBlock();
+    if (EntryVPBB->empty()) {
+      // VPlan native path. TODO: Unify both code paths.
+      EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
+    }
+    return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
+  }
+  const VPCanonicalIVPHIRecipe *getCanonicalIV() const {
+    return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
+  }
 };
 
 /// VPlan models a candidate for vectorization, encoding various decisions take
@@ -4252,12 +4267,14 @@ public:
       BackedgeTakenCount = new VPValue();
     return BackedgeTakenCount;
   }
+  VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
 
   /// The vector trip count.
   VPValue &getVectorTripCount() { return VectorTripCount; }
 
   /// Returns the VF of the vector loop region.
   VPValue &getVF() { return VF; };
+  const VPValue &getVF() const { return VF; };
 
   /// Returns VF * UF of the vector loop region.
   VPValue &getVFxUF() { return VFxUF; }
@@ -4369,16 +4386,6 @@ public:
   LLVM_DUMP_METHOD void dump() const;
 #endif
 
-  /// Returns the canonical induction recipe of the vector loop.
-  VPCanonicalIVPHIRecipe *getCanonicalIV() {
-    VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
-    if (EntryVPBB->empty()) {
-      // VPlan native path.
-      EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
-    }
-    return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
-  }
-
   VPValue *getSCEVExpansion(const SCEV *S) const {
     return SCEVToExpansion.lookup(S);
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 07bfe7a..f413c63 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -116,6 +116,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
   case VPInstruction::FirstActiveLane:
     return Type::getIntNTy(Ctx, 64);
   case VPInstruction::ExtractLastElement:
+  case VPInstruction::ExtractLastLanePerPart:
   case VPInstruction::ExtractPenultimateElement: {
     Type *BaseTy = inferScalarType(R->getOperand(0));
     if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index c0147ce..332791a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -658,9 +658,11 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
   }
 
   VPIRMetadata VPBranchWeights;
-  auto *Term = VPBuilder(CheckBlockVPBB)
-                   .createNaryOp(VPInstruction::BranchOnCond, {CondVPV},
-                                 Plan.getCanonicalIV()->getDebugLoc());
+  auto *Term =
+      VPBuilder(CheckBlockVPBB)
+          .createNaryOp(
+              VPInstruction::BranchOnCond, {CondVPV},
+              Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc());
   if (AddBranchWeights) {
     MDBuilder MDB(Plan.getContext());
     MDNode *BranchWeights =
@@ -921,8 +923,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
     if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
       if (DerivedIV->getNumUsers() == 1 &&
           DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
-        auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
-                                            &Plan.getVectorTripCount());
+        auto *NewSel = Builder.createSelect(
+            AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount());
         DerivedIV->moveAfter(&*Builder.getInsertPoint());
         DerivedIV->setOperand(1, NewSel);
         continue;
@@ -935,7 +937,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
                            "FMaxNum/FMinNum reduction.\n");
       return false;
     }
-    auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
+    auto *NewSel =
+        Builder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV);
     ResumeR->setOperand(0, NewSel);
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b42b049..ff286f7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -372,6 +372,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
   return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
 }
 
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t>
+m_ExtractLastLanePerPart(const Op0_t &Op0) {
+  return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0);
+}
+
 template <typename Op0_t, typename Op1_t, typename Op2_t>
 inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t>
 m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
@@ -394,6 +400,12 @@ m_AnyOf(const Op0_t &Op0) {
   return m_VPInstruction<VPInstruction::AnyOf>(Op0);
 }
 
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::FirstActiveLane, Op0_t>
+m_FirstActiveLane(const Op0_t &Op0) {
+  return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0);
+}
+
 template <unsigned Opcode, typename Op0_t>
 inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
   return AllRecipe_match<Opcode, Op0_t>(Op0);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 0c27d53..fb17d5d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -168,7 +168,8 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
   // non-phi instructions.
 
   auto &Plan = *HeaderVPBB->getPlan();
-  auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
+  auto *IV =
+      new VPWidenCanonicalIVRecipe(HeaderVPBB->getParent()->getCanonicalIV());
   Builder.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
   Builder.insert(IV);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2368d18..775837f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -511,6 +511,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
   case VPInstruction::CanonicalIVIncrementForPart:
   case VPInstruction::ExplicitVectorLength:
   case VPInstruction::ExtractLastElement:
+  case VPInstruction::ExtractLastLanePerPart:
   case VPInstruction::ExtractPenultimateElement:
   case VPInstruction::FirstActiveLane:
   case VPInstruction::Not:
@@ -878,9 +879,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
 
     return ReducedPartRdx;
   }
+  case VPInstruction::ExtractLastLanePerPart:
   case VPInstruction::ExtractLastElement:
   case VPInstruction::ExtractPenultimateElement: {
-    unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
+    unsigned Offset =
+        getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1;
     Value *Res;
     if (State.VF.isVector()) {
       assert(Offset <= State.VF.getKnownMinValue() &&
@@ -1166,6 +1169,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
 
 bool VPInstruction::isVectorToScalar() const {
   return getOpcode() == VPInstruction::ExtractLastElement ||
+         getOpcode() == VPInstruction::ExtractLastLanePerPart ||
          getOpcode() == VPInstruction::ExtractPenultimateElement ||
          getOpcode() == Instruction::ExtractElement ||
          getOpcode() == VPInstruction::ExtractLane ||
@@ -1229,6 +1233,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
   case VPInstruction::CanonicalIVIncrementForPart:
   case VPInstruction::ExtractLane:
   case VPInstruction::ExtractLastElement:
+  case VPInstruction::ExtractLastLanePerPart:
   case VPInstruction::ExtractPenultimateElement:
   case VPInstruction::ActiveLaneMask:
   case VPInstruction::FirstActiveLane:
@@ -1376,6 +1381,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
   case VPInstruction::ExtractLastElement:
     O << "extract-last-element";
     break;
+  case VPInstruction::ExtractLastLanePerPart:
+    O << "extract-last-lane-per-part";
+    break;
   case VPInstruction::ExtractPenultimateElement:
     O << "extract-penultimate-element";
     break;
@@ -2344,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
     return false;
   auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
-  auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
+  auto *CanIV = getParent()->getParent()->getCanonicalIV();
   return StartC && StartC->isZero() && StepC && StepC->isOne() &&
          getScalarType() == CanIV->getScalarType();
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 40b7e8d..8d76b2d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -501,7 +501,8 @@ static void removeRedundantInductionCasts(VPlan &Plan) {
 /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
 /// recipe, if it exists.
 static void removeRedundantCanonicalIVs(VPlan &Plan) {
-  VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
   VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
   for (VPUser *U : CanonicalIV->users()) {
     WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
@@ -512,7 +513,7 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) {
   if (!WidenNewIV)
     return;
 
-  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
 
@@ -582,8 +583,9 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
                     FPMathOperator *FPBinOp, Instruction *TruncI,
                     VPValue *StartV, VPValue *Step, DebugLoc DL,
                     VPBuilder &Builder) {
-  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
-  VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
+  VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
   VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
       Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
 
@@ -786,9 +788,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
                                                ScalarEvolution &SE) {
   VPValue *Incoming, *Mask;
   if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>(
-                     m_VPInstruction<VPInstruction::FirstActiveLane>(
-                         m_VPValue(Mask)),
-                     m_VPValue(Incoming))))
+                     m_FirstActiveLane(m_VPValue(Mask)), m_VPValue(Incoming))))
     return nullptr;
 
   auto *WideIV = getOptimizableIVOf(Incoming, SE);
@@ -800,8 +800,9 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
     return nullptr;
 
   // Calculate the final index.
-  VPValue *EndValue = Plan.getCanonicalIV();
-  auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  auto *CanonicalIV = LoopRegion->getCanonicalIV();
+  Type *CanonicalIVType = CanonicalIV->getScalarType();
   VPBuilder B(cast<VPBasicBlock>(PredVPBB));
 
   DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
@@ -810,7 +811,8 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
   Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane);
   FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
                                               FirstActiveLaneType, DL);
-  EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL);
+  VPValue *EndValue =
+      B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL);
 
   // `getOptimizableIVOf()` always returns the pre-incremented IV, so if it
   // changed it means the exit is using the incremented value, so we need to
@@ -1205,7 +1207,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
   }
 
   // Look through ExtractLastElement (BuildVector ....).
-  if (match(&R, m_ExtractLastElement(m_BuildVector()))) {
+  if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
+                            m_ExtractLastLanePerPart(m_BuildVector())))) {
     auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
     Def->replaceAllUsesWith(
         BuildVector->getOperand(BuildVector->getNumOperands() - 1));
@@ -1271,13 +1274,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
     return;
   }
 
-  if (match(Def, m_ExtractLastElement(m_Broadcast(m_VPValue(A))))) {
+  if (match(Def,
+            m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))),
+                        m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) {
     Def->replaceAllUsesWith(A);
     return;
   }
 
-  if (match(Def,
-            m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) &&
+  if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)),
+                             m_ExtractLastLanePerPart(m_VPValue(A)))) &&
       ((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) ||
        (isa<VPReplicateRecipe>(A) &&
         cast<VPReplicateRecipe>(A)->isSingleScalar())) &&
@@ -1285,6 +1290,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
              [Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) {
     return Def->replaceAllUsesWith(A);
   }
+
+  if (Plan->getUF() == 1 &&
+      match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) {
+    return Def->replaceAllUsesWith(
+        Builder.createNaryOp(VPInstruction::ExtractLastElement, {A}));
+  }
 }
 
 void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
@@ -1322,8 +1333,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
             RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
             true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
         Clone->insertBefore(RepOrWidenR);
-        auto *Ext = new VPInstruction(VPInstruction::ExtractLastElement,
-                                      {Clone->getOperand(0)});
+        unsigned ExtractOpc =
+            vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
+                ? VPInstruction::ExtractLastElement
+                : VPInstruction::ExtractLastLanePerPart;
+        auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
         Ext->insertBefore(Clone);
         Clone->setOperand(0, Ext);
         RepR->eraseFromParent();
@@ -1337,7 +1351,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
           !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
             return U->usesScalars(RepOrWidenR) ||
                    match(cast<VPRecipeBase>(U),
-                         m_ExtractLastElement(m_VPValue()));
+                         m_CombineOr(m_ExtractLastElement(m_VPValue()),
+                                     m_ExtractLastLanePerPart(m_VPValue())));
           }))
         continue;
 
@@ -1530,7 +1545,7 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
       return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
     });
 
-  auto *CanIV = Plan.getCanonicalIV();
+  auto *CanIV = Plan.getVectorLoopRegion()->getCanonicalIV();
   if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ,
                                   m_Specific(CanIV->getBackedgeValue()),
                                   m_Specific(&Plan.getVectorTripCount()))))
@@ -2319,7 +2334,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
     VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
   VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
   VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
-  auto *CanonicalIVPHI = Plan.getCanonicalIV();
+  auto *CanonicalIVPHI = TopRegion->getCanonicalIV();
   VPValue *StartV = CanonicalIVPHI->getStartValue();
 
   auto *CanonicalIVIncrement =
@@ -2358,7 +2373,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
 
   // Create the active lane mask instruction in the VPlan preheader.
   VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
-      ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+      ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1));
   auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
                                         {EntryIncrement, TC, ALMMultiplier}, DL,
                                         "active.lane.mask.entry");
@@ -2394,13 +2409,15 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
 /// TODO: Introduce explicit recipe for header-mask instead of searching
 /// for the header-mask pattern manually.
 static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
   SmallVector<VPValue *> WideCanonicalIVs;
-  auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(),
-                                            IsaPred<VPWidenCanonicalIVRecipe>);
-  assert(count_if(Plan.getCanonicalIV()->users(),
+  auto *FoundWidenCanonicalIVUser = find_if(
+      LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
+  assert(count_if(LoopRegion->getCanonicalIV()->users(),
                   IsaPred<VPWidenCanonicalIVRecipe>) <= 1 &&
          "Must have at most one VPWideCanonicalIVRecipe");
-  if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
+  if (FoundWidenCanonicalIVUser !=
+      LoopRegion->getCanonicalIV()->users().end()) {
     auto *WideCanonicalIV =
         cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
     WideCanonicalIVs.push_back(WideCanonicalIV);
@@ -2408,7 +2425,7 @@ static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
 
   // Also include VPWidenIntOrFpInductionRecipes that represent a widened
   // version of the canonical induction.
-  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
     if (WidenOriginalIV && WidenOriginalIV->isCanonical())
@@ -2441,8 +2458,9 @@ void VPlanTransforms::addActiveLaneMask(
          "DataAndControlFlowWithoutRuntimeCheck implies "
          "UseActiveLaneMaskForControlFlow");
 
-  auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(),
-                                            IsaPred<VPWidenCanonicalIVRecipe>);
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  auto *FoundWidenCanonicalIVUser = find_if(
+      LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
   assert(FoundWidenCanonicalIVUser &&
          "Must have widened canonical IV when tail folding!");
   VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan);
@@ -2455,7 +2473,7 @@ void VPlanTransforms::addActiveLaneMask(
   } else {
     VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
     VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
-        ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+        ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1));
     LaneMask =
         B.createNaryOp(VPInstruction::ActiveLaneMask,
                        {WideCanonicalIV, Plan.getTripCount(), ALMMultiplier},
@@ -2565,9 +2583,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
   });
 
   assert(all_of(Plan.getVFxUF().users(),
-                [&Plan](VPUser *U) {
-                  return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()),
-                                          m_Specific(&Plan.getVFxUF()))) ||
+                [&LoopRegion, &Plan](VPUser *U) {
+                  return match(U,
+                               m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
+                                       m_Specific(&Plan.getVFxUF()))) ||
                          isa<VPWidenPointerInductionRecipe>(U);
                 }) &&
          "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
@@ -2722,9 +2741,10 @@ void VPlanTransforms::addExplicitVectorLength(
     VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) {
   if (Plan.hasScalarVFOnly())
     return;
-  VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
 
-  auto *CanonicalIVPHI = Plan.getCanonicalIV();
+  auto *CanonicalIVPHI = LoopRegion->getCanonicalIV();
   auto *CanIVTy = CanonicalIVPHI->getScalarType();
   VPValue *StartV = CanonicalIVPHI->getStartValue();
 
@@ -4164,7 +4184,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
 
   // Adjust induction to reflect that the transformed plan only processes one
   // original iteration.
-  auto *CanIV = Plan.getCanonicalIV();
+  auto *CanIV = VectorLoop->getCanonicalIV();
   auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
   VPBuilder PHBuilder(Plan.getVectorPreheader());
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 1c4adfc..5aeda3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -69,7 +69,8 @@ class UnrollState {
                                 VPBasicBlock::iterator InsertPtForPhi);
 
   VPValue *getConstantVPV(unsigned Part) {
-    Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+    Type *CanIVIntTy =
+        Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType();
     return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
   }
 
@@ -351,8 +352,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
     // Compute*Result which combine all parts to compute the final value.
     VPValue *Op1;
     if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
-        match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
-                      m_VPValue(Op1))) ||
+        match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
         match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
                       m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
         match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 66748c5..8b1b0e5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -53,7 +53,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
   return Expanded;
 }
 
-bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
+bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
   if (isa<VPActiveLaneMaskPHIRecipe>(V))
     return true;
 
@@ -67,12 +67,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
 
   if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
     return B == Plan.getTripCount() &&
-           (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), m_One(),
-                                     m_Specific(&Plan.getVF()))) ||
+           (match(A,
+                  m_ScalarIVSteps(
+                      m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
+                      m_One(), m_Specific(&Plan.getVF()))) ||
             IsWideCanonicalIV(A));
 
   return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
-         B == Plan.getOrCreateBackedgeTakenCount();
+         B == Plan.getBackedgeTakenCount();
 }
 
 const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
@@ -102,7 +104,8 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
     return all_of(R->operands(), isUniformAcrossVFsAndUFs);
   }
 
-  auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV();
+  auto *CanonicalIV =
+      R->getParent()->getEnclosingLoopRegion()->getCanonicalIV();
   // Canonical IV chain is uniform.
   if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue())
     return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 0222b0a..cf95ac0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -90,7 +90,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
 }
 
 /// Return true if \p V is a header mask in \p Plan.
-bool isHeaderMask(const VPValue *V, VPlan &Plan);
+bool isHeaderMask(const VPValue *V, const VPlan &Plan);
 
 /// Checks if \p V is uniform across all VF lanes and UF parts. It is considered
 /// as such if it is either loop invariant (defined outside the vector region)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 5262af6..91734a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -298,11 +298,16 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
         return false;
       }
     }
-    if (const auto *EVL = dyn_cast<VPInstruction>(&R)) {
-      if (EVL->getOpcode() == VPInstruction::ExplicitVectorLength &&
-          !verifyEVLRecipe(*EVL)) {
-        errs() << "EVL VPValue is not used correctly\n";
-        return false;
+    if (const auto *VPI = dyn_cast<VPInstruction>(&R)) {
+      switch (VPI->getOpcode()) {
+      case VPInstruction::ExplicitVectorLength:
+        if (!verifyEVLRecipe(*VPI)) {
+          errs() << "EVL VPValue is not used correctly\n";
+          return false;
+        }
+        break;
+      default:
+        break;
       }
     }
   }
diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll
index f8b30df..56d762b 100644
--- a/llvm/test/Analysis/BasicAA/intrinsics.ll
+++ b/llvm/test/Analysis/BasicAA/intrinsics.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -aa-pipeline=basic-aa -passes=gvn -S < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
@@ -5,12 +6,15 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 ; BasicAA should prove that these calls don't interfere, since they are
 ; IntrArgReadMem and have noalias pointers.
 
-; CHECK:      define <8 x i16> @test0(ptr noalias %p, ptr noalias %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) [[ATTR:#[0-9]+]]
-; CHECK-NEXT:   call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m)
-; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test0(ptr noalias %p, ptr noalias %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) {
+; CHECK-LABEL: define <8 x i16> @test0(
+; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], <8 x i16> [[Y:%.*]], <8 x i1> [[M:%.*]], <8 x i16> [[PT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[P]], i32 16, <8 x i1> [[M]], <8 x i16> [[PT]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[Y]], ptr [[Q]], i32 16, <8 x i1> [[M]])
+; CHECK-NEXT:    [[C:%.*]] = add <8 x i16> [[A]], [[A]]
+; CHECK-NEXT:    ret <8 x i16> [[C]]
+;
 entry:
   %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind
   call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m)
@@ -24,4 +28,3 @@ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) nounwind
 
 ; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
 ; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
-; CHECK: attributes [[ATTR]] = { nounwind }
diff --git a/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll b/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll
index 7e980c9..ffd8259 100644
--- a/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll
+++ b/llvm/test/Analysis/BasicAA/scalable-dse-aa.ll
@@ -1,10 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
 
 define <vscale x 4 x float> @dead_scalable_store(ptr %0) {
 ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalable_store(
-; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
-; CHECK-NOT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.32, ptr nonnull %gep.arr.32, i32 1, <vscale x 4 x i1> %mask)
-; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask)
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[ARR:%.*]] = alloca [64 x i32], align 4
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
+; CHECK-NEXT:    [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
+; CHECK-NEXT:    [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48
+; CHECK-NEXT:    [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16
+; CHECK-NEXT:    [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48
+; CHECK-NEXT:    [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]]
+; CHECK-NEXT:    ret <vscale x 4 x float> [[FADD]]
 ;
   %arr = alloca [64 x i32], align 4
   %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
@@ -34,9 +47,21 @@ define <vscale x 4 x float> @dead_scalable_store(ptr %0) {
 
 define <4 x float> @dead_scalable_store_fixed(ptr %0) {
 ; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed(
-; CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <4 x i1> %mask)
-; CHECK-NOT: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.32, ptr nonnull %gep.arr.36, i32 1, <4 x i1> %mask2)
-; CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <4 x i1> %mask)
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[ARR:%.*]] = alloca [64 x i32], align 4
+; CHECK-NEXT:    [[MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4)
+; CHECK-NEXT:    [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
+; CHECK-NEXT:    [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48
+; CHECK-NEXT:    [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16
+; CHECK-NEXT:    [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48
+; CHECK-NEXT:    [[LOAD_0_16:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <4 x i1> [[MASK]])
+; CHECK-NEXT:    [[LOAD_0_48:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <4 x i1> [[MASK]])
+; CHECK-NEXT:    [[FADDOP0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer)
+; CHECK-NEXT:    [[FADDOP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer)
+; CHECK-NEXT:    [[FADD:%.*]] = fadd <4 x float> [[FADDOP0]], [[FADDOP1]]
+; CHECK-NEXT:    ret <4 x float> [[FADD]]
 ;
   %arr = alloca [64 x i32], align 4
   %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4)
@@ -67,9 +92,25 @@ define <4 x float> @dead_scalable_store_fixed(ptr %0) {
 
 define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) {
 ; CHECK-LABEL: define <vscale x 4 x float> @scalable_store_partial_overwrite(
-; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
-; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.30, ptr nonnull %gep.arr.30, i32 1, <vscale x 4 x i1> %mask)
-; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.48, ptr nonnull %gep.arr.48, i32 1, <vscale x 4 x i1> %mask)
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[ARR:%.*]] = alloca [64 x i32], align 4
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
+; CHECK-NEXT:    [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
+; CHECK-NEXT:    [[GEP_0_30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 30
+; CHECK-NEXT:    [[GEP_0_48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48
+; CHECK-NEXT:    [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16
+; CHECK-NEXT:    [[GEP_ARR_30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 30
+; CHECK-NEXT:    [[GEP_ARR_48:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 48
+; CHECK-NEXT:    [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[LOAD_0_30:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_30]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_30]], ptr nonnull [[GEP_ARR_30]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[LOAD_0_48:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_48]], ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_48]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]]
+; CHECK-NEXT:    ret <vscale x 4 x float> [[FADD]]
 ;
   %arr = alloca [64 x i32], align 4
   %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
@@ -99,9 +140,23 @@ define <vscale x 4 x float> @scalable_store_partial_overwrite(ptr %0) {
 
 define <vscale x 4 x float> @dead_scalable_store_small_mask(ptr %0) {
 ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalable_store_small_mask(
-; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.16, ptr nonnull %gep.arr.16, i32 1, <vscale x 4 x i1> %mask)
-; CHECK-NOT: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.30, ptr nonnull %gep.arr.30, i32 1, <vscale x 4 x i1> %mask)
-; CHECK: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %load.0.46, ptr nonnull %gep.arr.46, i32 1, <vscale x 4 x i1> %mask)
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[ARR:%.*]] = alloca [64 x i32], align 4
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
+; CHECK-NEXT:    [[GEP_0_16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
+; CHECK-NEXT:    [[GEP_0_46:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 46
+; CHECK-NEXT:    [[GEP_ARR_16:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 16
+; CHECK-NEXT:    [[GEP_ARR_46:%.*]] = getelementptr inbounds nuw i8, ptr [[ARR]], i64 46
+; CHECK-NEXT:    [[LOAD_0_16:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_16]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_16]], ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[LOAD_0_46:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_0_46]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0_46]], ptr nonnull [[GEP_ARR_46]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[SMALLMASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 2)
+; CHECK-NEXT:    [[FADDOP0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_16]], i32 1, <vscale x 4 x i1> [[SMALLMASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    [[FADDOP1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[GEP_ARR_46]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    [[FADD:%.*]] = fadd <vscale x 4 x float> [[FADDOP0]], [[FADDOP1]]
+; CHECK-NEXT:    ret <vscale x 4 x float> [[FADD]]
+;
   %arr = alloca [64 x i32], align 4
   %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
 
@@ -131,7 +186,12 @@ define <vscale x 4 x float> @dead_scalable_store_small_mask(ptr %0) {
 
 define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) {
 ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalar_store(
-; CHECK-NOT: store i32 20, ptr %gep.1.12
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4)
+; CHECK-NEXT:    [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x float> [[RETVAL]]
 ;
   %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i128(i128 0, i128 4)
   %gep.1.12 = getelementptr inbounds nuw i8, ptr %1, i64 12
@@ -144,10 +204,17 @@ define <vscale x 4 x float> @dead_scalar_store(ptr noalias %0, ptr %1) {
 }
 
 
-; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed_large_mask(
-; CHECK-NOT: store i32 20, ptr %1
-; CHECK: store i32 50, ptr %gep.5
 define <4 x float> @dead_scalable_store_fixed_large_mask(ptr noalias %0, ptr %1) {
+; CHECK-LABEL: define <4 x float> @dead_scalable_store_fixed_large_mask(
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7)
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 5
+; CHECK-NEXT:    store i32 50, ptr [[GEP_5]], align 4
+; CHECK-NEXT:    [[LOAD_0:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[TMP0]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <4 x i1> [[MASK]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull [[TMP1]], i32 1, <4 x i1> [[MASK]], <4 x float> zeroinitializer)
+; CHECK-NEXT:    ret <4 x float> [[RETVAL]]
+;
   %mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 7)
   store i32 20, ptr %1
 
@@ -164,8 +231,16 @@ define <4 x float> @dead_scalable_store_fixed_large_mask(ptr noalias %0, ptr %1)
 ; This get active lane mask may cover 4 or 8 integers
 define <vscale x 4 x float> @mask_gt_minimum_num_elts(ptr noalias %0, ptr %1) {
 ; CHECK-LABEL: define <vscale x 4 x float> @mask_gt_minimum_num_elts(
-; CHECK: store i32 10, ptr %gep.1.12
-; CHECK: store i32 20, ptr %gep.1.28
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8)
+; CHECK-NEXT:    [[GEP_1_12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
+; CHECK-NEXT:    store i32 10, ptr [[GEP_1_12]], align 4
+; CHECK-NEXT:    [[GEP_1_28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 28
+; CHECK-NEXT:    store i32 20, ptr [[GEP_1_28]], align 4
+; CHECK-NEXT:    [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x float> [[RETVAL]]
 ;
   %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 8)
   %gep.1.12 = getelementptr inbounds nuw i8, ptr %1, i64 12
@@ -182,7 +257,13 @@ define <vscale x 4 x float> @mask_gt_minimum_num_elts(ptr noalias %0, ptr %1) {
 ; Don't do anything if the mask's Op1 < Op0
 define <vscale x 4 x float> @active_lane_mask_lt(ptr noalias %0, ptr %1) {
 ; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_lt(
-; CHECK: store i32 20, ptr %1
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2)
+; CHECK-NEXT:    store i32 20, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x float> [[RETVAL]]
 ;
   %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 4, i32 2)
   store i32 20, ptr %1
@@ -196,7 +277,13 @@ define <vscale x 4 x float> @active_lane_mask_lt(ptr noalias %0, ptr %1) {
 ; Don't do anything if the mask's Op1 == Op0
 define <vscale x 4 x float> @active_lane_mask_eq(ptr noalias %0, ptr %1) {
 ; CHECK-LABEL: define <vscale x 4 x float> @active_lane_mask_eq(
-; CHECK: store i32 20, ptr %1
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2)
+; CHECK-NEXT:    store i32 20, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x float> [[RETVAL]]
 ;
   %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 2, i32 2)
   store i32 20, ptr %1
@@ -209,8 +296,14 @@ define <vscale x 4 x float> @active_lane_mask_eq(ptr noalias %0, ptr %1) {
 
 define <vscale x 16 x i8> @scalar_stores_small_mask(ptr noalias %0, ptr %1) {
 ; CHECK-LABEL: define <vscale x 16 x i8> @scalar_stores_small_mask(
-; CHECK-NOT: store i8 60, ptr %gep.1.6
-; CHECK: store i8 120, ptr %gep.1.8
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i8(i8 0, i8 7)
+; CHECK-NEXT:    [[GEP_1_8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8
+; CHECK-NEXT:    store i8 120, ptr [[GEP_1_8]], align 1
+; CHECK-NEXT:    [[LOAD_0:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[LOAD_0]], ptr [[TMP1]], i32 1, <vscale x 16 x i1> [[MASK]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP1]], i32 1, <vscale x 16 x i1> [[MASK]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[RETVAL]]
 ;
   %mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i8.i8(i8 0, i8 7)
   %gep.1.6 = getelementptr inbounds nuw i8, ptr %1, i64 6
@@ -226,10 +319,14 @@ define <vscale x 16 x i8> @scalar_stores_small_mask(ptr noalias %0, ptr %1) {
 
 define <vscale x 4 x float> @dead_scalar_store_offset(ptr noalias %0, ptr %1) {
 ; CHECK-LABEL: define <vscale x 4 x float> @dead_scalar_store_offset(
-; CHECK-NOT: store i32 10, ptr %gep.1.0
-; CHECK-NOT: store i32 20, ptr %gep.1.4
-; CHECK-NOT: store i32 30, ptr %gep.1.8
-; CHECK: store i32 40, ptr %gep.1.12
+; CHECK-SAME: ptr noalias [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4)
+; CHECK-NEXT:    [[GEP_1_12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
+; CHECK-NEXT:    store i32 40, ptr [[GEP_1_12]], align 4
+; CHECK-NEXT:    [[LOAD_0:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP0]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[LOAD_0]], ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr nonnull [[TMP1]], i32 1, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x float> [[RETVAL]]
 ;
   %mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 1, i32 4)
   %gep.1.0 = getelementptr inbounds nuw i8, ptr %1, i64 0
diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll
index 38bd98f..15d67489 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll
@@ -7,708 +7,708 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @ext() {
 ; CHECK-LABEL: 'ext'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> undef to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> poison to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> poison to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> poison to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> poison to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> poison to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %r0 = sext i1 undef to i8
-  %r1 = zext i1 undef to i8
-  %r2 = sext i1 undef to i16
-  %r3 = zext i1 undef to i16
-  %r4 = sext i1 undef to i32
-  %r5 = zext i1 undef to i32
-  %r6 = sext i1 undef to i64
-  %r7 = zext i1 undef to i64
-  %r9 = sext i8 undef to i16
-  %r10 = zext i8 undef to i16
-  %r11 = sext i8 undef to i32
-  %r12 = zext i8 undef to i32
-  %r13 = sext i8 undef to i64
-  %r14 = zext i8 undef to i64
-  %r17 = sext i16 undef to i32
-  %r18 = zext i16 undef to i32
-  %r19 = sext i16 undef to i64
-  %r20 = zext i16 undef to i64
-  %r24 = sext i32 undef to i64
-  %r25 = zext i32 undef to i64
+  %r0 = sext i1 poison to i8
+  %r1 = zext i1 poison to i8
+  %r2 = sext i1 poison to i16
+  %r3 = zext i1 poison to i16
+  %r4 = sext i1 poison to i32
+  %r5 = zext i1 poison to i32
+  %r6 = sext i1 poison to i64
+  %r7 = zext i1 poison to i64
+  %r9 = sext i8 poison to i16
+  %r10 = zext i8 poison to i16
+  %r11 = sext i8 poison to i32
+  %r12 = zext i8 poison to i32
+  %r13 = sext i8 poison to i64
+  %r14 = zext i8 poison to i64
+  %r17 = sext i16 poison to i32
+  %r18 = zext i16 poison to i32
+  %r19 = sext i16 poison to i64
+  %r20 = zext i16 poison to i64
+  %r24 = sext i32 poison to i64
+  %r25 = zext i32 poison to i64
 
-  %s2i8i16 = sext <2 x i8> undef to <2 x i16>
-  %z2i8i16 = zext <2 x i8> undef to <2 x i16>
-  %s2i8i32 = sext <2 x i8> undef to <2 x i32>
-  %z2i8i32 = zext <2 x i8> undef to <2 x i32>
-  %s2i8i64 = sext <2 x i8> undef to <2 x i64>
-  %z2i8i64 = zext <2 x i8> undef to <2 x i64>
-  %s2i16i32 = sext <2 x i16> undef to <2 x i32>
-  %z2i16i32 = zext <2 x i16> undef to <2 x i32>
-  %s2i16i64 = sext <2 x i16> undef to <2 x i64>
-  %z2i16i64 = zext <2 x i16> undef to <2 x i64>
-  %s2i32i64 = sext <2 x i32> undef to <2 x i64>
-  %z2i32i64 = zext <2 x i32> undef to <2 x i64>
+  %s2i8i16 = sext <2 x i8> poison to <2 x i16>
+  %z2i8i16 = zext <2 x i8> poison to <2 x i16>
+  %s2i8i32 = sext <2 x i8> poison to <2 x i32>
+  %z2i8i32 = zext <2 x i8> poison to <2 x i32>
+  %s2i8i64 = sext <2 x i8> poison to <2 x i64>
+  %z2i8i64 = zext <2 x i8> poison to <2 x i64>
+  %s2i16i32 = sext <2 x i16> poison to <2 x i32>
+  %z2i16i32 = zext <2 x i16> poison to <2 x i32>
+  %s2i16i64 = sext <2 x i16> poison to <2 x i64>
+  %z2i16i64 = zext <2 x i16> poison to <2 x i64>
+  %s2i32i64 = sext <2 x i32> poison to <2 x i64>
+  %z2i32i64 = zext <2 x i32> poison to <2 x i64>
 
-  %s4i8i16 = sext <4 x i8>  undef to <4 x i16>
-  %z4i8i16 = zext <4 x i8>  undef to <4 x i16>
-  %s4i8i32 = sext <4 x i8>  undef to <4 x i32>
-  %z4i8i32 = zext <4 x i8>  undef to <4 x i32>
-  %s4i8i64 = sext <4 x i8>  undef to <4 x i64>
-  %z4i8i64 = zext <4 x i8>  undef to <4 x i64>
-  %s4i16i32 = sext <4 x i16> undef to <4 x i32>
-  %z4i16i32 = zext <4 x i16> undef to <4 x i32>
-  %s4i16i64 = sext <4 x i16> undef to <4 x i64>
-  %z4i16i64 = zext <4 x i16> undef to <4 x i64>
-  %s4i32i64 = sext <4 x i32> undef to <4 x i64>
-  %z4i32i64 = zext <4 x i32> undef to <4 x i64>
+  %s4i8i16 = sext <4 x i8>  poison to <4 x i16>
+  %z4i8i16 = zext <4 x i8>  poison to <4 x i16>
+  %s4i8i32 = sext <4 x i8>  poison to <4 x i32>
+  %z4i8i32 = zext <4 x i8>  poison to <4 x i32>
+  %s4i8i64 = sext <4 x i8>  poison to <4 x i64>
+  %z4i8i64 = zext <4 x i8>  poison to <4 x i64>
+  %s4i16i32 = sext <4 x i16> poison to <4 x i32>
+  %z4i16i32 = zext <4 x i16> poison to <4 x i32>
+  %s4i16i64 = sext <4 x i16> poison to <4 x i64>
+  %z4i16i64 = zext <4 x i16> poison to <4 x i64>
+  %s4i32i64 = sext <4 x i32> poison to <4 x i64>
+  %z4i32i64 = zext <4 x i32> poison to <4 x i64>
 
-  %s8i8i16 = sext <8 x i8>  undef to <8 x i16>
-  %z8i8i16 = zext <8 x i8>  undef to <8 x i16>
-  %s8i8i32 = sext <8 x i8>  undef to <8 x i32>
-  %z8i8i32 = zext <8 x i8>  undef to <8 x i32>
-  %s8i8i64 = sext <8 x i8>  undef to <8 x i64>
-  %z8i8i64 = zext <8 x i8>  undef to <8 x i64>
-  %s8i16i32 = sext <8 x i16> undef to <8 x i32>
-  %z8i16i32 = zext <8 x i16> undef to <8 x i32>
-  %s8i16i64 = sext <8 x i16> undef to <8 x i64>
-  %z8i16i64 = zext <8 x i16> undef to <8 x i64>
-  %s8i32i64 = sext <8 x i32> undef to <8 x i64>
-  %z8i32i64 = zext <8 x i32> undef to <8 x i64>
+  %s8i8i16 = sext <8 x i8>  poison to <8 x i16>
+  %z8i8i16 = zext <8 x i8>  poison to <8 x i16>
+  %s8i8i32 = sext <8 x i8>  poison to <8 x i32>
+  %z8i8i32 = zext <8 x i8>  poison to <8 x i32>
+  %s8i8i64 = sext <8 x i8>  poison to <8 x i64>
+  %z8i8i64 = zext <8 x i8>  poison to <8 x i64>
+  %s8i16i32 = sext <8 x i16> poison to <8 x i32>
+  %z8i16i32 = zext <8 x i16> poison to <8 x i32>
+  %s8i16i64 = sext <8 x i16> poison to <8 x i64>
+  %z8i16i64 = zext <8 x i16> poison to <8 x i64>
+  %s8i32i64 = sext <8 x i32> poison to <8 x i64>
+  %z8i32i64 = zext <8 x i32> poison to <8 x i64>
 
-  %s16i8i16 = sext <16 x i8>  undef to <16 x i16>
-  %z16i8i16 = zext <16 x i8>  undef to <16 x i16>
-  %s16i8i32 = sext <16 x i8>  undef to <16 x i32>
-  %z16i8i32 = zext <16 x i8>  undef to <16 x i32>
-  %s16i8i64 = sext <16 x i8>  undef to <16 x i64>
-  %z16i8i64 = zext <16 x i8>  undef to <16 x i64>
-  %s16i16i32 = sext <16 x i16> undef to <16 x i32>
-  %z16i16i32 = zext <16 x i16> undef to <16 x i32>
-  %s16i16i64 = sext <16 x i16> undef to <16 x i64>
-  %z16i16i64 = zext <16 x i16> undef to <16 x i64>
-  %s16i32i64 = sext <16 x i32> undef to <16 x i64>
-  %z16i32i64 = zext <16 x i32> undef to <16 x i64>
+  %s16i8i16 = sext <16 x i8>  poison to <16 x i16>
+  %z16i8i16 = zext <16 x i8>  poison to <16 x i16>
+  %s16i8i32 = sext <16 x i8>  poison to <16 x i32>
+  %z16i8i32 = zext <16 x i8>  poison to <16 x i32>
+  %s16i8i64 = sext <16 x i8>  poison to <16 x i64>
+  %z16i8i64 = zext <16 x i8>  poison to <16 x i64>
+  %s16i16i32 = sext <16 x i16> poison to <16 x i32>
+  %z16i16i32 = zext <16 x i16> poison to <16 x i32>
+  %s16i16i64 = sext <16 x i16> poison to <16 x i64>
+  %z16i16i64 = zext <16 x i16> poison to <16 x i64>
+  %s16i32i64 = sext <16 x i32> poison to <16 x i64>
+  %z16i32i64 = zext <16 x i32> poison to <16 x i64>
   ret void
 }
 
 define void @trunc() {
 ; CHECK-LABEL: 'trunc'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 undef to i1
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 undef to i1
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 undef to i1
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 undef to i1
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 poison to i1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 poison to i1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 poison to i1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 poison to i1
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> poison to <2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> poison to <2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = trunc <2 x i64> poison to <2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> poison to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = trunc <2 x i64> poison to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> poison to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> poison to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> poison to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s4i32i64 = trunc <4 x i64> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> poison to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s8i16i32 = trunc <8 x i32> poison to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %s16i8i16 = trunc <16 x i16> poison to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %r8 = trunc i8 undef to i1
-  %r15 = trunc i16 undef to i1
-  %r16 = trunc i16 undef to i8
-  %r21 = trunc i32 undef to i1
-  %r22 = trunc i32 undef to i8
-  %r23 = trunc i32 undef to i16
-  %r26 = trunc i64 undef to i1
-  %r27 = trunc i64 undef to i8
-  %r28 = trunc i64 undef to i16
-  %r29 = trunc i64 undef to i32
+  %r8 = trunc i8 poison to i1
+  %r15 = trunc i16 poison to i1
+  %r16 = trunc i16 poison to i8
+  %r21 = trunc i32 poison to i1
+  %r22 = trunc i32 poison to i8
+  %r23 = trunc i32 poison to i16
+  %r26 = trunc i64 poison to i1
+  %r27 = trunc i64 poison to i8
+  %r28 = trunc i64 poison to i16
+  %r29 = trunc i64 poison to i32
 
-  %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
-  %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
-  %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-  %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
-  %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-  %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
+  %s2i8i16 = trunc <2 x i16> poison to <2 x i8>
+  %s2i8i32 = trunc <2 x i32> poison to <2 x i8>
+  %s2i8i64 = trunc <2 x i64> poison to <2 x i8>
+  %s2i16i32 = trunc <2 x i32> poison to <2 x i16>
+  %s2i16i64 = trunc <2 x i64> poison to <2 x i16>
+  %s2i32i64 = trunc <2 x i64> poison to <2 x i32>
 
-  %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
-  %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
-  %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-  %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
-  %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-  %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+  %s4i8i16 = trunc <4 x i16> poison to <4 x i8>
+  %s4i8i32 = trunc <4 x i32> poison to <4 x i8>
+  %s4i8i64 = trunc <4 x i64> poison to <4 x i8>
+  %s4i16i32 = trunc <4 x i32> poison to <4 x i16>
+  %s4i16i64 = trunc <4 x i64> poison to <4 x i16>
+  %s4i32i64 = trunc <4 x i64> poison to <4 x i32>
 
-  %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
-  %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
-  %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-  %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
-  %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-  %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+  %s8i8i16 = trunc <8 x i16> poison to <8 x i8>
+  %s8i8i32 = trunc <8 x i32> poison to <8 x i8>
+  %s8i8i64 = trunc <8 x i64> poison to <8 x i8>
+  %s8i16i32 = trunc <8 x i32> poison to <8 x i16>
+  %s8i16i64 = trunc <8 x i64> poison to <8 x i16>
+  %s8i32i64 = trunc <8 x i64> poison to <8 x i32>
 
-  %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
-  %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
-  %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-  %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
-  %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-  %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+  %s16i8i16 = trunc <16 x i16> poison to <16 x i8>
+  %s16i8i32 = trunc <16 x i32> poison to <16 x i8>
+  %s16i8i64 = trunc <16 x i64> poison to <16 x i8>
+  %s16i16i32 = trunc <16 x i32> poison to <16 x i16>
+  %s16i16i64 = trunc <16 x i64> poison to <16 x i16>
+  %s16i32i64 = trunc <16 x i64> poison to <16 x i32>
   ret void
 }
 
 define i32 @casts_no_users() {
 ; CHECK-LABEL: 'casts_no_users'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float undef to i1
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float undef to i1
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double undef to i1
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double undef to i1
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double undef to i8
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %truncf64f16 = fptrunc double undef to half
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f64f16 = fptrunc <2 x double> undef to <2 x half>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f64f16 = fptrunc <4 x double> undef to <4 x half>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f64f16 = fptrunc <8 x double> undef to <8 x half>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f64f16 = fptrunc <16 x double> undef to <16 x half>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %truncv32f16 = fptrunc float undef to half
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %truncv2f32f16 = fptrunc <2 x float> undef to <2 x half>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %truncv4f32f16 = fptrunc <4 x float> undef to <4 x half>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f32f16 = fptrunc <8 x float> undef to <8 x half>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f32f16 = fptrunc <16 x float> undef to <16 x half>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %extf16f32 = fpext half undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x half> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x half> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x half> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x half> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %extf16f64 = fpext half undef to double
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x half> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x half> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x half> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x half> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> undef to <2 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> undef to <2 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> undef to <2 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> undef to <2 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> undef to <2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> undef to <2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> undef to <2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> undef to <4 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> undef to <4 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> undef to <4 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> undef to <4 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> undef to <4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> undef to <8 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> undef to <8 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> undef to <8 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> undef to <8 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> undef to <8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> undef to <8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> undef to <16 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> undef to <16 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> undef to <16 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> undef to <16 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> undef to <16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> undef to <16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i16> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i16> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i16> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i16> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> undef to <16 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float poison to i1
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float poison to i1
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double poison to i1
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double poison to i1
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double poison to i8
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %truncf64f16 = fptrunc double poison to half
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f64f16 = fptrunc <2 x double> poison to <2 x half>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f64f16 = fptrunc <4 x double> poison to <4 x half>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f64f16 = fptrunc <8 x double> poison to <8 x half>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f64f16 = fptrunc <16 x double> poison to <16 x half>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %truncv32f16 = fptrunc float poison to half
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %truncv2f32f16 = fptrunc <2 x float> poison to <2 x half>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %truncv4f32f16 = fptrunc <4 x float> poison to <4 x half>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f32f16 = fptrunc <8 x float> poison to <8 x half>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f32f16 = fptrunc <16 x float> poison to <16 x half>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %extf16f32 = fpext half poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x half> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x half> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x half> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x half> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %extf16f64 = fpext half poison to double
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x half> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x half> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x half> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x half> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> poison to <2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> poison to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> poison to <2 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> poison to <2 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> poison to <2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> poison to <2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> poison to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> poison to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> poison to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> poison to <4 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> poison to <4 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> poison to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> poison to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> poison to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> poison to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> poison to <4 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> poison to <4 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> poison to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> poison to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> poison to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> poison to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> poison to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> poison to <4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> poison to <8 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> poison to <8 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> poison to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> poison to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> poison to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> poison to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> poison to <8 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> poison to <8 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> poison to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> poison to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> poison to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> poison to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> poison to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> poison to <8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> poison to <16 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> poison to <16 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> poison to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> poison to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> poison to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> poison to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> poison to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> poison to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> poison to <16 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> poison to <16 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> poison to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> poison to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> poison to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> poison to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> poison to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> poison to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> poison to <16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> poison to <2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> poison to <4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> poison to <4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> poison to <8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i16> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i16> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> poison to <8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> poison to <16 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i16> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i16> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> poison to <16 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> poison to <16 x double>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
-  %r30 = fptoui float undef to i1
-  %r31 = fptosi float undef to i1
-  %r32 = fptoui float undef to i8
-  %r33 = fptosi float undef to i8
-  %r34 = fptoui float undef to i16
-  %r35 = fptosi float undef to i16
-  %r36 = fptoui float undef to i32
-  %r37 = fptosi float undef to i32
-  %r38 = fptoui float undef to i64
-  %r39 = fptosi float undef to i64
-  %r40 = fptoui double undef to i1
-  %r41 = fptosi double undef to i1
-  %r42 = fptoui double undef to i8
-  %r43 = fptosi double undef to i8
-  %r44 = fptoui double undef to i16
-  %r45 = fptosi double undef to i16
-  %r46 = fptoui double undef to i32
-  %r47 = fptosi double undef to i32
-  %r48 = fptoui double undef to i64
-  %r49 = fptosi double undef to i64
-  %r50 = sitofp i1 undef to float
-  %r51 = uitofp i1 undef to float
-  %r52 = sitofp i1 undef to double
-  %r53 = uitofp i1 undef to double
-  %r54 = sitofp i8 undef to float
-  %r55 = uitofp i8 undef to float
-  %r56 = sitofp i8 undef to double
-  %r57 = uitofp i8 undef to double
-  %r58 = sitofp i16 undef to float
-  %r59 = uitofp i16 undef to float
-  %r60 = sitofp i16 undef to double
-  %r61 = uitofp i16 undef to double
-  %r62 = sitofp i32 undef to float
-  %r63 = uitofp i32 undef to float
-  %r64 = sitofp i32 undef to double
-  %r65 = uitofp i32 undef to double
-  %r66 = sitofp i64 undef to float
-  %r67 = uitofp i64 undef to float
-  %r68 = sitofp i64 undef to double
-  %r69 = uitofp i64 undef to double
-  %r80 = fptrunc double undef to float
-  %r81 = fptrunc <2 x double> undef to <2 x float>
-  %r82 = fptrunc <4 x double> undef to <4 x float>
-  %r83 = fptrunc <8 x double> undef to <8 x float>
-  %r84 = fptrunc <16 x double> undef to <16 x float>
-  %truncf64f16 = fptrunc double undef to half
-  %truncv2f64f16 = fptrunc <2 x double> undef to <2 x half>
-  %truncv4f64f16 = fptrunc <4 x double> undef to <4 x half>
-  %truncv8f64f16 = fptrunc <8 x double> undef to <8 x half>
-  %truncv16f64f16 = fptrunc <16 x double> undef to <16 x half>
-  %truncv32f16 = fptrunc float undef to half
-  %truncv2f32f16 = fptrunc <2 x float> undef to <2 x half>
-  %truncv4f32f16 = fptrunc <4 x float> undef to <4 x half>
-  %truncv8f32f16 = fptrunc <8 x float> undef to <8 x half>
-  %truncv16f32f16 = fptrunc <16 x float> undef to <16 x half>
-  %r85 = fpext float undef to double
-  %r86 = fpext <2 x float> undef to <2 x double>
-  %r87 = fpext <4 x float> undef to <4 x double>
-  %r88 = fpext <8 x float> undef to <8 x double>
-  %r89 = fpext <16 x float> undef to <16 x double>
-  %extf16f32 = fpext half undef to float
-  %extv2f16f32 = fpext <2 x half> undef to <2 x float>
-  %extv4f16f32 = fpext <4 x half> undef to <4 x float>
-  %extv8f16f32 = fpext <8 x half> undef to <8 x float>
-  %extv16f16f32 = fpext <16 x half> undef to <16 x float>
-  %extf16f64 = fpext half undef to double
-  %extv2f16f64 = fpext <2 x half> undef to <2 x double>
-  %extv4f16f64 = fpext <4 x half> undef to <4 x double>
-  %extv8f16f64 = fpext <8 x half> undef to <8 x double>
-  %extv16f16f64 = fpext <16 x half> undef to <16 x double>
-  %r90 = fptoui <2 x float> undef to <2 x i1>
-  %r91 = fptosi <2 x float> undef to <2 x i1>
-  %r92 = fptoui <2 x float> undef to <2 x i8>
-  %r93 = fptosi <2 x float> undef to <2 x i8>
-  %r94 = fptoui <2 x float> undef to <2 x i16>
-  %r95 = fptosi <2 x float> undef to <2 x i16>
-  %r96 = fptoui <2 x float> undef to <2 x i32>
-  %r97 = fptosi <2 x float> undef to <2 x i32>
-  %r98 = fptoui <2 x float> undef to <2 x i64>
-  %r99 = fptosi <2 x float> undef to <2 x i64>
-  %r100 = fptoui <2 x double> undef to <2 x i1>
-  %r101 = fptosi <2 x double> undef to <2 x i1>
-  %r102 = fptoui <2 x double> undef to <2 x i8>
-  %r103 = fptosi <2 x double> undef to <2 x i8>
-  %r104 = fptoui <2 x double> undef to <2 x i16>
-  %r105 = fptosi <2 x double> undef to <2 x i16>
-  %r106 = fptoui <2 x double> undef to <2 x i32>
-  %r107 = fptosi <2 x double> undef to <2 x i32>
-  %r108 = fptoui <2 x double> undef to <2 x i64>
-  %r109 = fptosi <2 x double> undef to <2 x i64>
+  %r30 = fptoui float poison to i1
+  %r31 = fptosi float poison to i1
+  %r32 = fptoui float poison to i8
+  %r33 = fptosi float poison to i8
+  %r34 = fptoui float poison to i16
+  %r35 = fptosi float poison to i16
+  %r36 = fptoui float poison to i32
+  %r37 = fptosi float poison to i32
+  %r38 = fptoui float poison to i64
+  %r39 = fptosi float poison to i64
+  %r40 = fptoui double poison to i1
+  %r41 = fptosi double poison to i1
+  %r42 = fptoui double poison to i8
+  %r43 = fptosi double poison to i8
+  %r44 = fptoui double poison to i16
+  %r45 = fptosi double poison to i16
+  %r46 = fptoui double poison to i32
+  %r47 = fptosi double poison to i32
+  %r48 = fptoui double poison to i64
+  %r49 = fptosi double poison to i64
+  %r50 = sitofp i1 poison to float
+  %r51 = uitofp i1 poison to float
+  %r52 = sitofp i1 poison to double
+  %r53 = uitofp i1 poison to double
+  %r54 = sitofp i8 poison to float
+  %r55 = uitofp i8 poison to float
+  %r56 = sitofp i8 poison to double
+  %r57 = uitofp i8 poison to double
+  %r58 = sitofp i16 poison to float
+  %r59 = uitofp i16 poison to float
+  %r60 = sitofp i16 poison to double
+  %r61 = uitofp i16 poison to double
+  %r62 = sitofp i32 poison to float
+  %r63 = uitofp i32 poison to float
+  %r64 = sitofp i32 poison to double
+  %r65 = uitofp i32 poison to double
+  %r66 = sitofp i64 poison to float
+  %r67 = uitofp i64 poison to float
+  %r68 = sitofp i64 poison to double
+  %r69 = uitofp i64 poison to double
+  %r80 = fptrunc double poison to float
+  %r81 = fptrunc <2 x double> poison to <2 x float>
+  %r82 = fptrunc <4 x double> poison to <4 x float>
+  %r83 = fptrunc <8 x double> poison to <8 x float>
+  %r84 = fptrunc <16 x double> poison to <16 x float>
+  %truncf64f16 = fptrunc double poison to half
+  %truncv2f64f16 = fptrunc <2 x double> poison to <2 x half>
+  %truncv4f64f16 = fptrunc <4 x double> poison to <4 x half>
+  %truncv8f64f16 = fptrunc <8 x double> poison to <8 x half>
+  %truncv16f64f16 = fptrunc <16 x double> poison to <16 x half>
+  %truncv32f16 = fptrunc float poison to half
+  %truncv2f32f16 = fptrunc <2 x float> poison to <2 x half>
+  %truncv4f32f16 = fptrunc <4 x float> poison to <4 x half>
+  %truncv8f32f16 = fptrunc <8 x float> poison to <8 x half>
+  %truncv16f32f16 = fptrunc <16 x float> poison to <16 x half>
+  %r85 = fpext float poison to double
+  %r86 = fpext <2 x float> poison to <2 x double>
+  %r87 = fpext <4 x float> poison to <4 x double>
+  %r88 = fpext <8 x float> poison to <8 x double>
+  %r89 = fpext <16 x float> poison to <16 x double>
+  %extf16f32 = fpext half poison to float
+  %extv2f16f32 = fpext <2 x half> poison to <2 x float>
+  %extv4f16f32 = fpext <4 x half> poison to <4 x float>
+  %extv8f16f32 = fpext <8 x half> poison to <8 x float>
+  %extv16f16f32 = fpext <16 x half> poison to <16 x float>
+  %extf16f64 = fpext half poison to double
+  %extv2f16f64 = fpext <2 x half> poison to <2 x double>
+  %extv4f16f64 = fpext <4 x half> poison to <4 x double>
+  %extv8f16f64 = fpext <8 x half> poison to <8 x double>
+  %extv16f16f64 = fpext <16 x half> poison to <16 x double>
+  %r90 = fptoui <2 x float> poison to <2 x i1>
+  %r91 = fptosi <2 x float> poison to <2 x i1>
+  %r92 = fptoui <2 x float> poison to <2 x i8>
+  %r93 = fptosi <2 x float> poison to <2 x i8>
+  %r94 = fptoui <2 x float> poison to <2 x i16>
+  %r95 = fptosi <2 x float> poison to <2 x i16>
+  %r96 = fptoui <2 x float> poison to <2 x i32>
+  %r97 = fptosi <2 x float> poison to <2 x i32>
+  %r98 = fptoui <2 x float> poison to <2 x i64>
+  %r99 = fptosi <2 x float> poison to <2 x i64>
+  %r100 = fptoui <2 x double> poison to <2 x i1>
+  %r101 = fptosi <2 x double> poison to <2 x i1>
+  %r102 = fptoui <2 x double> poison to <2 x i8>
+  %r103 = fptosi <2 x double> poison to <2 x i8>
+  %r104 = fptoui <2 x double> poison to <2 x i16>
+  %r105 = fptosi <2 x double> poison to <2 x i16>
+  %r106 = fptoui <2 x double> poison to <2 x i32>
+  %r107 = fptosi <2 x double> poison to <2 x i32>
+  %r108 = fptoui <2 x double> poison to <2 x i64>
+  %r109 = fptosi <2 x double> poison to <2 x i64>
 
-  %r110 = fptoui <4 x float> undef to <4 x i1>
-  %r111 = fptosi <4 x float> undef to <4 x i1>
-  %r112 = fptoui <4 x float> undef to <4 x i8>
-  %r113 = fptosi <4 x float> undef to <4 x i8>
-  %r114 = fptoui <4 x float> undef to <4 x i16>
-  %r115 = fptosi <4 x float> undef to <4 x i16>
-  %r116 = fptoui <4 x float> undef to <4 x i32>
-  %r117 = fptosi <4 x float> undef to <4 x i32>
-  %r118 = fptoui <4 x float> undef to <4 x i64>
-  %r119 = fptosi <4 x float> undef to <4 x i64>
+  %r110 = fptoui <4 x float> poison to <4 x i1>
+  %r111 = fptosi <4 x float> poison to <4 x i1>
+  %r112 = fptoui <4 x float> poison to <4 x i8>
+  %r113 = fptosi <4 x float> poison to <4 x i8>
+  %r114 = fptoui <4 x float> poison to <4 x i16>
+  %r115 = fptosi <4 x float> poison to <4 x i16>
+  %r116 = fptoui <4 x float> poison to <4 x i32>
+  %r117 = fptosi <4 x float> poison to <4 x i32>
+  %r118 = fptoui <4 x float> poison to <4 x i64>
+  %r119 = fptosi <4 x float> poison to <4 x i64>
 
-  %r120 = fptoui <4 x double> undef to <4 x i1>
-  %r121 = fptosi <4 x double> undef to <4 x i1>
-  %r122 = fptoui <4 x double> undef to <4 x i8>
-  %r123 = fptosi <4 x double> undef to <4 x i8>
-  %r124 = fptoui <4 x double> undef to <4 x i16>
-  %r125 = fptosi <4 x double> undef to <4 x i16>
-  %r126 = fptoui <4 x double> undef to <4 x i32>
-  %r127 = fptosi <4 x double> undef to <4 x i32>
-  %r128 = fptoui <4 x double> undef to <4 x i64>
-  %r129 = fptosi <4 x double> undef to <4 x i64>
+  %r120 = fptoui <4 x double> poison to <4 x i1>
+  %r121 = fptosi <4 x double> poison to <4 x i1>
+  %r122 = fptoui <4 x double> poison to <4 x i8>
+  %r123 = fptosi <4 x double> poison to <4 x i8>
+  %r124 = fptoui <4 x double> poison to <4 x i16>
+  %r125 = fptosi <4 x double> poison to <4 x i16>
+  %r126 = fptoui <4 x double> poison to <4 x i32>
+  %r127 = fptosi <4 x double> poison to <4 x i32>
+  %r128 = fptoui <4 x double> poison to <4 x i64>
+  %r129 = fptosi <4 x double> poison to <4 x i64>
 
-  %r130 = fptoui <8 x float> undef to <8 x i1>
-  %r131 = fptosi <8 x float> undef to <8 x i1>
-  %r132 = fptoui <8 x float> undef to <8 x i8>
-  %r133 = fptosi <8 x float> undef to <8 x i8>
-  %r134 = fptoui <8 x float> undef to <8 x i16>
-  %r135 = fptosi <8 x float> undef to <8 x i16>
-  %r136 = fptoui <8 x float> undef to <8 x i32>
-  %r137 = fptosi <8 x float> undef to <8 x i32>
-  %r138 = fptoui <8 x float> undef to <8 x i64>
-  %r139 = fptosi <8 x float> undef to <8 x i64>
+  %r130 = fptoui <8 x float> poison to <8 x i1>
+  %r131 = fptosi <8 x float> poison to <8 x i1>
+  %r132 = fptoui <8 x float> poison to <8 x i8>
+  %r133 = fptosi <8 x float> poison to <8 x i8>
+  %r134 = fptoui <8 x float> poison to <8 x i16>
+  %r135 = fptosi <8 x float> poison to <8 x i16>
+  %r136 = fptoui <8 x float> poison to <8 x i32>
+  %r137 = fptosi <8 x float> poison to <8 x i32>
+  %r138 = fptoui <8 x float> poison to <8 x i64>
+  %r139 = fptosi <8 x float> poison to <8 x i64>
 
-  %r140 = fptoui <8 x double> undef to <8 x i1>
-  %r141 = fptosi <8 x double> undef to <8 x i1>
-  %r142 = fptoui <8 x double> undef to <8 x i8>
-  %r143 = fptosi <8 x double> undef to <8 x i8>
-  %r144 = fptoui <8 x double> undef to <8 x i16>
-  %r145 = fptosi <8 x double> undef to <8 x i16>
-  %r146 = fptoui <8 x double> undef to <8 x i32>
-  %r147 = fptosi <8 x double> undef to <8 x i32>
-  %r148 = fptoui <8 x double> undef to <8 x i64>
-  %r149 = fptosi <8 x double> undef to <8 x i64>
+  %r140 = fptoui <8 x double> poison to <8 x i1>
+  %r141 = fptosi <8 x double> poison to <8 x i1>
+  %r142 = fptoui <8 x double> poison to <8 x i8>
+  %r143 = fptosi <8 x double> poison to <8 x i8>
+  %r144 = fptoui <8 x double> poison to <8 x i16>
+  %r145 = fptosi <8 x double> poison to <8 x i16>
+  %r146 = fptoui <8 x double> poison to <8 x i32>
+  %r147 = fptosi <8 x double> poison to <8 x i32>
+  %r148 = fptoui <8 x double> poison to <8 x i64>
+  %r149 = fptosi <8 x double> poison to <8 x i64>
 
-  %r150 = fptoui <16 x float> undef to <16 x i1>
-  %r151 = fptosi <16 x float> undef to <16 x i1>
-  %r152 = fptoui <16 x float> undef to <16 x i8>
-  %r153 = fptosi <16 x float> undef to <16 x i8>
-  %r154 = fptoui <16 x float> undef to <16 x i16>
-  %r155 = fptosi <16 x float> undef to <16 x i16>
-  %r156 = fptoui <16 x float> undef to <16 x i32>
-  %r157 = fptosi <16 x float> undef to <16 x i32>
-  %r158 = fptoui <16 x float> undef to <16 x i64>
-  %r159 = fptosi <16 x float> undef to <16 x i64>
+  %r150 = fptoui <16 x float> poison to <16 x i1>
+  %r151 = fptosi <16 x float> poison to <16 x i1>
+  %r152 = fptoui <16 x float> poison to <16 x i8>
+  %r153 = fptosi <16 x float> poison to <16 x i8>
+  %r154 = fptoui <16 x float> poison to <16 x i16>
+  %r155 = fptosi <16 x float> poison to <16 x i16>
+  %r156 = fptoui <16 x float> poison to <16 x i32>
+  %r157 = fptosi <16 x float> poison to <16 x i32>
+  %r158 = fptoui <16 x float> poison to <16 x i64>
+  %r159 = fptosi <16 x float> poison to <16 x i64>
 
-  %r160 = fptoui <16 x double> undef to <16 x i1>
-  %r161 = fptosi <16 x double> undef to <16 x i1>
-  %r162 = fptoui <16 x double> undef to <16 x i8>
-  %r163 = fptosi <16 x double> undef to <16 x i8>
-  %r164 = fptoui <16 x double> undef to <16 x i16>
-  %r165 = fptosi <16 x double> undef to <16 x i16>
-  %r166 = fptoui <16 x double> undef to <16 x i32>
-  %r167 = fptosi <16 x double> undef to <16 x i32>
-  %r168 = fptoui <16 x double> undef to <16 x i64>
-  %r169 = fptosi <16 x double> undef to <16 x i64>
+  %r160 = fptoui <16 x double> poison to <16 x i1>
+  %r161 = fptosi <16 x double> poison to <16 x i1>
+  %r162 = fptoui <16 x double> poison to <16 x i8>
+  %r163 = fptosi <16 x double> poison to <16 x i8>
+  %r164 = fptoui <16 x double> poison to <16 x i16>
+  %r165 = fptosi <16 x double> poison to <16 x i16>
+  %r166 = fptoui <16 x double> poison to <16 x i32>
+  %r167 = fptosi <16 x double> poison to <16 x i32>
+  %r168 = fptoui <16 x double> poison to <16 x i64>
+  %r169 = fptosi <16 x double> poison to <16 x i64>
 
-  %r170 = uitofp <2 x i1> undef to <2 x float>
-  %r171 = sitofp <2 x i1> undef to <2 x float>
-  %r172 = uitofp <2 x i8> undef to <2 x float>
-  %r173 = sitofp <2 x i8> undef to <2 x float>
-  %r174 = uitofp <2 x i16> undef to <2 x float>
-  %r175 = sitofp <2 x i16> undef to <2 x float>
-  %r176 = uitofp <2 x i32> undef to <2 x float>
-  %r177 = sitofp <2 x i32> undef to <2 x float>
-  %r178 = uitofp <2 x i64> undef to <2 x float>
-  %r179 = sitofp <2 x i64> undef to <2 x float>
+  %r170 = uitofp <2 x i1> poison to <2 x float>
+  %r171 = sitofp <2 x i1> poison to <2 x float>
+  %r172 = uitofp <2 x i8> poison to <2 x float>
+  %r173 = sitofp <2 x i8> poison to <2 x float>
+  %r174 = uitofp <2 x i16> poison to <2 x float>
+  %r175 = sitofp <2 x i16> poison to <2 x float>
+  %r176 = uitofp <2 x i32> poison to <2 x float>
+  %r177 = sitofp <2 x i32> poison to <2 x float>
+  %r178 = uitofp <2 x i64> poison to <2 x float>
+  %r179 = sitofp <2 x i64> poison to <2 x float>
 
-  %r180 = uitofp <2 x i1> undef to <2 x double>
-  %r181 = sitofp <2 x i1> undef to <2 x double>
-  %r182 = uitofp <2 x i8> undef to <2 x double>
-  %r183 = sitofp <2 x i8> undef to <2 x double>
-  %r184 = uitofp <2 x i16> undef to <2 x double>
-  %r185 = sitofp <2 x i16> undef to <2 x double>
-  %r186 = uitofp <2 x i32> undef to <2 x double>
-  %r187 = sitofp <2 x i32> undef to <2 x double>
-  %r188 = uitofp <2 x i64> undef to <2 x double>
-  %r189 = sitofp <2 x i64> undef to <2 x double>
+  %r180 = uitofp <2 x i1> poison to <2 x double>
+  %r181 = sitofp <2 x i1> poison to <2 x double>
+  %r182 = uitofp <2 x i8> poison to <2 x double>
+  %r183 = sitofp <2 x i8> poison to <2 x double>
+  %r184 = uitofp <2 x i16> poison to <2 x double>
+  %r185 = sitofp <2 x i16> poison to <2 x double>
+  %r186 = uitofp <2 x i32> poison to <2 x double>
+  %r187 = sitofp <2 x i32> poison to <2 x double>
+  %r188 = uitofp <2 x i64> poison to <2 x double>
+  %r189 = sitofp <2 x i64> poison to <2 x double>
 
-  %r190 = uitofp <4 x i1> undef to <4 x float>
-  %r191 = sitofp <4 x i1> undef to <4 x float>
-  %r192 = uitofp <4 x i8> undef to <4 x float>
-  %r193 = sitofp <4 x i8> undef to <4 x float>
-  %r194 = uitofp <4 x i16> undef to <4 x float>
-  %r195 = sitofp <4 x i16> undef to <4 x float>
-  %r196 = uitofp <4 x i32> undef to <4 x float>
-  %r197 = sitofp <4 x i32> undef to <4 x float>
-  %r198 = uitofp <4 x i64> undef to <4 x float>
-  %r199 = sitofp <4 x i64> undef to <4 x float>
+  %r190 = uitofp <4 x i1> poison to <4 x float>
+  %r191 = sitofp <4 x i1> poison to <4 x float>
+  %r192 = uitofp <4 x i8> poison to <4 x float>
+  %r193 = sitofp <4 x i8> poison to <4 x float>
+  %r194 = uitofp <4 x i16> poison to <4 x float>
+  %r195 = sitofp <4 x i16> poison to <4 x float>
+  %r196 = uitofp <4 x i32> poison to <4 x float>
+  %r197 = sitofp <4 x i32> poison to <4 x float>
+  %r198 = uitofp <4 x i64> poison to <4 x float>
+  %r199 = sitofp <4 x i64> poison to <4 x float>
 
-  %r200 = uitofp <4 x i1> undef to <4 x double>
-  %r201 = sitofp <4 x i1> undef to <4 x double>
-  %r202 = uitofp <4 x i8> undef to <4 x double>
-  %r203 = sitofp <4 x i8> undef to <4 x double>
-  %r204 = uitofp <4 x i16> undef to <4 x double>
-  %r205 = sitofp <4 x i16> undef to <4 x double>
-  %r206 = uitofp <4 x i32> undef to <4 x double>
-  %r207 = sitofp <4 x i32> undef to <4 x double>
-  %r208 = uitofp <4 x i64> undef to <4 x double>
-  %r209 = sitofp <4 x i64> undef to <4 x double>
+  %r200 = uitofp <4 x i1> poison to <4 x double>
+  %r201 = sitofp <4 x i1> poison to <4 x double>
+  %r202 = uitofp <4 x i8> poison to <4 x double>
+  %r203 = sitofp <4 x i8> poison to <4 x double>
+  %r204 = uitofp <4 x i16> poison to <4 x double>
+  %r205 = sitofp <4 x i16> poison to <4 x double>
+  %r206 = uitofp <4 x i32> poison to <4 x double>
+  %r207 = sitofp <4 x i32> poison to <4 x double>
+  %r208 = uitofp <4 x i64> poison to <4 x double>
+  %r209 = sitofp <4 x i64> poison to <4 x double>
 
-  %r210 = uitofp <8 x i1> undef to <8 x float>
-  %r211 = sitofp <8 x i1> undef to <8 x float>
-  %r212 = uitofp <8 x i8> undef to <8 x float>
-  %r213 = sitofp <8 x i8> undef to <8 x float>
-  %r214 = uitofp <8 x i16> undef to <8 x float>
-  %r215 = sitofp <8 x i16> undef to <8 x float>
-  %r216 = uitofp <8 x i32> undef to <8 x float>
-  %r217 = sitofp <8 x i32> undef to <8 x float>
-  %r218 = uitofp <8 x i64> undef to <8 x float>
-  %r219 = sitofp <8 x i64> undef to <8 x float>
+  %r210 = uitofp <8 x i1> poison to <8 x float>
+  %r211 = sitofp <8 x i1> poison to <8 x float>
+  %r212 = uitofp <8 x i8> poison to <8 x float>
+  %r213 = sitofp <8 x i8> poison to <8 x float>
+  %r214 = uitofp <8 x i16> poison to <8 x float>
+  %r215 = sitofp <8 x i16> poison to <8 x float>
+  %r216 = uitofp <8 x i32> poison to <8 x float>
+  %r217 = sitofp <8 x i32> poison to <8 x float>
+  %r218 = uitofp <8 x i64> poison to <8 x float>
+  %r219 = sitofp <8 x i64> poison to <8 x float>
 
-  %r220 = uitofp <8 x i1> undef to <8 x double>
-  %r221 = sitofp <8 x i1> undef to <8 x double>
-  %r222 = uitofp <8 x i8> undef to <8 x double>
-  %r223 = sitofp <8 x i8> undef to <8 x double>
-  %r224 = uitofp <8 x i16> undef to <8 x double>
-  %r225 = sitofp <8 x i16> undef to <8 x double>
-  %r226 = uitofp <8 x i16> undef to <8 x double>
-  %r227 = sitofp <8 x i16> undef to <8 x double>
-  %r228 = uitofp <8 x i64> undef to <8 x double>
-  %r229 = sitofp <8 x i64> undef to <8 x double>
+  %r220 = uitofp <8 x i1> poison to <8 x double>
+  %r221 = sitofp <8 x i1> poison to <8 x double>
+  %r222 = uitofp <8 x i8> poison to <8 x double>
+  %r223 = sitofp <8 x i8> poison to <8 x double>
+  %r224 = uitofp <8 x i16> poison to <8 x double>
+  %r225 = sitofp <8 x i16> poison to <8 x double>
+  %r226 = uitofp <8 x i16> poison to <8 x double>
+  %r227 = sitofp <8 x i16> poison to <8 x double>
+  %r228 = uitofp <8 x i64> poison to <8 x double>
+  %r229 = sitofp <8 x i64> poison to <8 x double>
 
-  %r230 = uitofp <16 x i1> undef to <16 x float>
-  %r231 = sitofp <16 x i1> undef to <16 x float>
-  %r232 = uitofp <16 x i8> undef to <16 x float>
-  %r233 = sitofp <16 x i8> undef to <16 x float>
-  %r234 = uitofp <16 x i16> undef to <16 x float>
-  %r235 = sitofp <16 x i16> undef to <16 x float>
-  %r236 = uitofp <16 x i32> undef to <16 x float>
-  %r237 = sitofp <16 x i32> undef to <16 x float>
-  %r238 = uitofp <16 x i64> undef to <16 x float>
-  %r239 = sitofp <16 x i64> undef to <16 x float>
+  %r230 = uitofp <16 x i1> poison to <16 x float>
+  %r231 = sitofp <16 x i1> poison to <16 x float>
+  %r232 = uitofp <16 x i8> poison to <16 x float>
+  %r233 = sitofp <16 x i8> poison to <16 x float>
+  %r234 = uitofp <16 x i16> poison to <16 x float>
+  %r235 = sitofp <16 x i16> poison to <16 x float>
+  %r236 = uitofp <16 x i32> poison to <16 x float>
+  %r237 = sitofp <16 x i32> poison to <16 x float>
+  %r238 = uitofp <16 x i64> poison to <16 x float>
+  %r239 = sitofp <16 x i64> poison to <16 x float>
 
-  %r240 = uitofp <16 x i1> undef to <16 x double>
-  %r241 = sitofp <16 x i1> undef to <16 x double>
-  %r242 = uitofp <16 x i8> undef to <16 x double>
-  %r243 = sitofp <16 x i8> undef to <16 x double>
-  %r244 = uitofp <16 x i16> undef to <16 x double>
-  %r245 = sitofp <16 x i16> undef to <16 x double>
-  %r246 = uitofp <16 x i16> undef to <16 x double>
-  %r247 = sitofp <16 x i16> undef to <16 x double>
-  %r248 = uitofp <16 x i64> undef to <16 x double>
-  %r249 = sitofp <16 x i64> undef to <16 x double>
+  %r240 = uitofp <16 x i1> poison to <16 x double>
+  %r241 = sitofp <16 x i1> poison to <16 x double>
+  %r242 = uitofp <16 x i8> poison to <16 x double>
+  %r243 = sitofp <16 x i8> poison to <16 x double>
+  %r244 = uitofp <16 x i16> poison to <16 x double>
+  %r245 = sitofp <16 x i16> poison to <16 x double>
+  %r246 = uitofp <16 x i16> poison to <16 x double>
+  %r247 = sitofp <16 x i16> poison to <16 x double>
+  %r248 = uitofp <16 x i64> poison to <16 x double>
+  %r249 = sitofp <16 x i64> poison to <16 x double>
 
   ret i32 undef
 }
@@ -836,24 +836,24 @@ define i32 @casts_with_users(i8 %a, i16 %b, i32 %c, i64 %d, i1 %e) {
 
 define i32 @bitcasts() {
 ; CHECK-LABEL: 'bitcasts'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %a = bitcast i32 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %b = bitcast float undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c = bitcast i32 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %d = bitcast float undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %e = bitcast i64 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f = bitcast double undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %g = bitcast half undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %h = bitcast i16 undef to half
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %a = bitcast i32 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %b = bitcast float poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c = bitcast i32 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %d = bitcast float poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %e = bitcast i64 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f = bitcast double poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %g = bitcast half poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %h = bitcast i16 poison to half
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
-  %a = bitcast i32 undef to i32
-  %b = bitcast float undef to float
-  %c = bitcast i32 undef to float
-  %d = bitcast float undef to i32
-  %e = bitcast i64 undef to double
-  %f = bitcast double undef to i64
-  %g = bitcast half undef to i16
-  %h = bitcast i16 undef to half
+  %a = bitcast i32 poison to i32
+  %b = bitcast float poison to float
+  %c = bitcast i32 poison to float
+  %d = bitcast float poison to i32
+  %e = bitcast i64 poison to double
+  %f = bitcast double poison to i64
+  %g = bitcast half poison to i16
+  %h = bitcast i16 poison to half
   ret i32 undef
 }
 
@@ -941,31 +941,31 @@ define i32 @load_extends() {
 
 define i32 @store_truncs() {
 ; CHECK-LABEL: 'store_truncs'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r0 = trunc i64 undef to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r0 = trunc i64 poison to i8
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i8 %r0, ptr undef, align 1
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r1 = trunc i64 undef to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r1 = trunc i64 poison to i16
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i16 %r1, ptr undef, align 2
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r2 = trunc i64 undef to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r2 = trunc i64 poison to i32
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i32 %r2, ptr undef, align 4
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r3 = trunc i32 undef to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r3 = trunc i32 poison to i8
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i8 %r3, ptr undef, align 1
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r4 = trunc i32 undef to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r4 = trunc i32 poison to i16
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i16 %r4, ptr undef, align 2
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r5 = trunc i16 undef to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r5 = trunc i16 poison to i8
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i8 %r5, ptr undef, align 1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
-  %r0 = trunc i64 undef to i8
+  %r0 = trunc i64 poison to i8
   store i8 %r0, ptr undef
-  %r1 = trunc i64 undef to i16
+  %r1 = trunc i64 poison to i16
   store i16 %r1, ptr undef
-  %r2 = trunc i64 undef to i32
+  %r2 = trunc i64 poison to i32
   store i32 %r2, ptr undef
-  %r3 = trunc i32 undef to i8
+  %r3 = trunc i32 poison to i8
   store i8 %r3, ptr undef
-  %r4 = trunc i32 undef to i16
+  %r4 = trunc i32 poison to i16
   store i16 %r4, ptr undef
-  %r5 = trunc i16 undef to i8
+  %r5 = trunc i16 poison to i8
   store i8 %r5, ptr undef
   ret i32 undef
 }
@@ -1013,296 +1013,296 @@ declare void @use(i16, i16, i32, i32, i64, i64, i32, i32, i64, i64, i64, i64)
 
 define void @fp16cast() {
 ; CHECK-NOFP16-LABEL: 'fp16cast'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half undef to i1
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half undef to i1
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half undef to i8
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half undef to i8
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half undef to i16
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half undef to i16
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half undef to i32
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half undef to i32
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half undef to i64
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half undef to i64
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> undef to <2 x i1>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> undef to <2 x i1>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> undef to <2 x i8>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> undef to <2 x i8>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> undef to <2 x i16>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> undef to <2 x i16>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> undef to <2 x i32>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> undef to <2 x i32>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> undef to <2 x i64>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> undef to <2 x i64>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> undef to <4 x i1>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> undef to <4 x i1>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> undef to <4 x i8>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> undef to <4 x i8>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> undef to <4 x i16>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> undef to <4 x i16>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> undef to <4 x i32>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> undef to <4 x i32>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> undef to <4 x i64>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> undef to <4 x i64>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> undef to <8 x i1>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> undef to <8 x i1>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> undef to <8 x i8>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> undef to <8 x i8>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> undef to <8 x i16>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> undef to <8 x i16>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> undef to <8 x i32>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> undef to <8 x i32>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> undef to <8 x i64>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> undef to <8 x i64>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> undef to <16 x i1>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> undef to <16 x i1>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> undef to <16 x i8>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> undef to <16 x i8>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> undef to <16 x i16>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> undef to <16 x i16>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> undef to <16 x i32>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> undef to <16 x i32>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> undef to <16 x i64>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> undef to <16 x i64>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r252 = uitofp <8 x i8> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r253 = sitofp <8 x i8> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> undef to <8 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> undef to <16 x half>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> undef to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half poison to i1
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half poison to i1
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half poison to i8
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half poison to i8
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half poison to i16
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half poison to i16
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half poison to i32
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half poison to i32
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half poison to i64
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half poison to i64
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> poison to <2 x i1>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> poison to <2 x i1>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> poison to <2 x i8>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> poison to <2 x i8>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> poison to <2 x i16>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> poison to <4 x i64>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> poison to <4 x i64>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> poison to <8 x i32>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> poison to <8 x i32>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> poison to <16 x i1>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> poison to <16 x i1>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> poison to <16 x i8>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> poison to <16 x i8>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> poison to <16 x i16>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> poison to <16 x i16>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:82 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r252 = uitofp <8 x i8> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r253 = sitofp <8 x i8> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half>
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fp16cast'
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half undef to i1
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half undef to i1
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half undef to i8
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half undef to i8
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half undef to i16
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half undef to i16
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half undef to i32
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half undef to i32
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half undef to i64
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half undef to i64
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> undef to <2 x i1>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> undef to <2 x i1>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> undef to <2 x i8>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> undef to <2 x i8>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> undef to <2 x i16>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> undef to <2 x i16>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> undef to <2 x i32>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> undef to <2 x i32>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> undef to <2 x i64>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> undef to <2 x i64>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> undef to <4 x i1>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> undef to <4 x i1>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> undef to <4 x i8>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> undef to <4 x i8>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> undef to <4 x i16>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> undef to <4 x i16>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> undef to <4 x i32>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> undef to <4 x i32>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> undef to <4 x i64>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> undef to <4 x i64>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> undef to <8 x i1>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> undef to <8 x i1>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> undef to <8 x i8>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> undef to <8 x i8>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> undef to <8 x i16>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> undef to <8 x i16>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> undef to <8 x i32>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> undef to <8 x i32>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> undef to <8 x i64>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> undef to <8 x i64>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> undef to <16 x i1>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> undef to <16 x i1>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> undef to <16 x i8>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> undef to <16 x i8>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> undef to <16 x i16>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> undef to <16 x i16>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> undef to <16 x i32>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> undef to <16 x i32>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> undef to <16 x i64>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> undef to <16 x i64>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> undef to <8 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> undef to <16 x half>
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> undef to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half poison to i1
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half poison to i1
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half poison to i8
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half poison to i8
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half poison to i16
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half poison to i16
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half poison to i32
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half poison to i32
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half poison to i64
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half poison to i64
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> poison to <2 x i1>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> poison to <2 x i1>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> poison to <2 x i8>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> poison to <2 x i8>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> poison to <2 x i16>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> poison to <4 x i64>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> poison to <4 x i64>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> poison to <8 x i32>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> poison to <8 x i32>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> poison to <16 x i1>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> poison to <16 x i1>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> poison to <16 x i8>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> poison to <16 x i8>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> poison to <16 x i16>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> poison to <16 x i16>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half>
+; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half>
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %r30 = fptoui half undef to i1
-  %r31 = fptosi half undef to i1
-  %r32 = fptoui half undef to i8
-  %r33 = fptosi half undef to i8
-  %r34 = fptoui half undef to i16
-  %r35 = fptosi half undef to i16
-  %r36 = fptoui half undef to i32
-  %r37 = fptosi half undef to i32
-  %r38 = fptoui half undef to i64
-  %r39 = fptosi half undef to i64
+  %r30 = fptoui half poison to i1
+  %r31 = fptosi half poison to i1
+  %r32 = fptoui half poison to i8
+  %r33 = fptosi half poison to i8
+  %r34 = fptoui half poison to i16
+  %r35 = fptosi half poison to i16
+  %r36 = fptoui half poison to i32
+  %r37 = fptosi half poison to i32
+  %r38 = fptoui half poison to i64
+  %r39 = fptosi half poison to i64
 
-  %r90 = fptoui <2 x half> undef to <2 x i1>
-  %r91 = fptosi <2 x half> undef to <2 x i1>
-  %r92 = fptoui <2 x half> undef to <2 x i8>
-  %r93 = fptosi <2 x half> undef to <2 x i8>
-  %r94 = fptoui <2 x half> undef to <2 x i16>
-  %r95 = fptosi <2 x half> undef to <2 x i16>
-  %r96 = fptoui <2 x half> undef to <2 x i32>
-  %r97 = fptosi <2 x half> undef to <2 x i32>
-  %r98 = fptoui <2 x half> undef to <2 x i64>
-  %r99 = fptosi <2 x half> undef to <2 x i64>
+  %r90 = fptoui <2 x half> poison to <2 x i1>
+  %r91 = fptosi <2 x half> poison to <2 x i1>
+  %r92 = fptoui <2 x half> poison to <2 x i8>
+  %r93 = fptosi <2 x half> poison to <2 x i8>
+  %r94 = fptoui <2 x half> poison to <2 x i16>
+  %r95 = fptosi <2 x half> poison to <2 x i16>
+  %r96 = fptoui <2 x half> poison to <2 x i32>
+  %r97 = fptosi <2 x half> poison to <2 x i32>
+  %r98 = fptoui <2 x half> poison to <2 x i64>
+  %r99 = fptosi <2 x half> poison to <2 x i64>
 
-  %r110 = fptoui <4 x half> undef to <4 x i1>
-  %r111 = fptosi <4 x half> undef to <4 x i1>
-  %r112 = fptoui <4 x half> undef to <4 x i8>
-  %r113 = fptosi <4 x half> undef to <4 x i8>
-  %r114 = fptoui <4 x half> undef to <4 x i16>
-  %r115 = fptosi <4 x half> undef to <4 x i16>
-  %r116 = fptoui <4 x half> undef to <4 x i32>
-  %r117 = fptosi <4 x half> undef to <4 x i32>
-  %r118 = fptoui <4 x half> undef to <4 x i64>
-  %r119 = fptosi <4 x half> undef to <4 x i64>
+  %r110 = fptoui <4 x half> poison to <4 x i1>
+  %r111 = fptosi <4 x half> poison to <4 x i1>
+  %r112 = fptoui <4 x half> poison to <4 x i8>
+  %r113 = fptosi <4 x half> poison to <4 x i8>
+  %r114 = fptoui <4 x half> poison to <4 x i16>
+  %r115 = fptosi <4 x half> poison to <4 x i16>
+  %r116 = fptoui <4 x half> poison to <4 x i32>
+  %r117 = fptosi <4 x half> poison to <4 x i32>
+  %r118 = fptoui <4 x half> poison to <4 x i64>
+  %r119 = fptosi <4 x half> poison to <4 x i64>
 
-  %r130 = fptoui <8 x half> undef to <8 x i1>
-  %r131 = fptosi <8 x half> undef to <8 x i1>
-  %r132 = fptoui <8 x half> undef to <8 x i8>
-  %r133 = fptosi <8 x half> undef to <8 x i8>
-  %r134 = fptoui <8 x half> undef to <8 x i16>
-  %r135 = fptosi <8 x half> undef to <8 x i16>
-  %r136 = fptoui <8 x half> undef to <8 x i32>
-  %r137 = fptosi <8 x half> undef to <8 x i32>
-  %r138 = fptoui <8 x half> undef to <8 x i64>
-  %r139 = fptosi <8 x half> undef to <8 x i64>
+  %r130 = fptoui <8 x half> poison to <8 x i1>
+  %r131 = fptosi <8 x half> poison to <8 x i1>
+  %r132 = fptoui <8 x half> poison to <8 x i8>
+  %r133 = fptosi <8 x half> poison to <8 x i8>
+  %r134 = fptoui <8 x half> poison to <8 x i16>
+  %r135 = fptosi <8 x half> poison to <8 x i16>
+  %r136 = fptoui <8 x half> poison to <8 x i32>
+  %r137 = fptosi <8 x half> poison to <8 x i32>
+  %r138 = fptoui <8 x half> poison to <8 x i64>
+  %r139 = fptosi <8 x half> poison to <8 x i64>
 
-  %r150 = fptoui <16 x half> undef to <16 x i1>
-  %r151 = fptosi <16 x half> undef to <16 x i1>
-  %r152 = fptoui <16 x half> undef to <16 x i8>
-  %r153 = fptosi <16 x half> undef to <16 x i8>
-  %r154 = fptoui <16 x half> undef to <16 x i16>
-  %r155 = fptosi <16 x half> undef to <16 x i16>
-  %r156 = fptoui <16 x half> undef to <16 x i32>
-  %r157 = fptosi <16 x half> undef to <16 x i32>
-  %r158 = fptoui <16 x half> undef to <16 x i64>
-  %r159 = fptosi <16 x half> undef to <16 x i64>
+  %r150 = fptoui <16 x half> poison to <16 x i1>
+  %r151 = fptosi <16 x half> poison to <16 x i1>
+  %r152 = fptoui <16 x half> poison to <16 x i8>
+  %r153 = fptosi <16 x half> poison to <16 x i8>
+  %r154 = fptoui <16 x half> poison to <16 x i16>
+  %r155 = fptosi <16 x half> poison to <16 x i16>
+  %r156 = fptoui <16 x half> poison to <16 x i32>
+  %r157 = fptosi <16 x half> poison to <16 x i32>
+  %r158 = fptoui <16 x half> poison to <16 x i64>
+  %r159 = fptosi <16 x half> poison to <16 x i64>
 
-  %r250 = uitofp <8 x i1> undef to <8 x half>
-  %r251 = sitofp <8 x i1> undef to <8 x half>
-  %r252 = uitofp <8 x i8> undef to <8 x half>
-  %r253 = sitofp <8 x i8> undef to <8 x half>
-  %r254 = uitofp <8 x i16> undef to <8 x half>
-  %r255 = sitofp <8 x i16> undef to <8 x half>
-  %r256 = uitofp <8 x i32> undef to <8 x half>
-  %r257 = sitofp <8 x i32> undef to <8 x half>
-  %r258 = uitofp <8 x i64> undef to <8 x half>
-  %r259 = sitofp <8 x i64> undef to <8 x half>
+  %r250 = uitofp <8 x i1> poison to <8 x half>
+  %r251 = sitofp <8 x i1> poison to <8 x half>
+  %r252 = uitofp <8 x i8> poison to <8 x half>
+  %r253 = sitofp <8 x i8> poison to <8 x half>
+  %r254 = uitofp <8 x i16> poison to <8 x half>
+  %r255 = sitofp <8 x i16> poison to <8 x half>
+  %r256 = uitofp <8 x i32> poison to <8 x half>
+  %r257 = sitofp <8 x i32> poison to <8 x half>
+  %r258 = uitofp <8 x i64> poison to <8 x half>
+  %r259 = sitofp <8 x i64> poison to <8 x half>
 
-  %r260 = uitofp <16 x i1> undef to <16 x half>
-  %r261 = sitofp <16 x i1> undef to <16 x half>
-  %r262 = uitofp <16 x i8> undef to <16 x half>
-  %r263 = sitofp <16 x i8> undef to <16 x half>
-  %r264 = uitofp <16 x i16> undef to <16 x half>
-  %r265 = sitofp <16 x i16> undef to <16 x half>
-  %r266 = uitofp <16 x i32> undef to <16 x half>
-  %r267 = sitofp <16 x i32> undef to <16 x half>
-  %r268 = uitofp <16 x i64> undef to <16 x half>
-  %r269 = sitofp <16 x i64> undef to <16 x half>
+  %r260 = uitofp <16 x i1> poison to <16 x half>
+  %r261 = sitofp <16 x i1> poison to <16 x half>
+  %r262 = uitofp <16 x i8> poison to <16 x half>
+  %r263 = sitofp <16 x i8> poison to <16 x half>
+  %r264 = uitofp <16 x i16> poison to <16 x half>
+  %r265 = sitofp <16 x i16> poison to <16 x half>
+  %r266 = uitofp <16 x i32> poison to <16 x half>
+  %r267 = sitofp <16 x i32> poison to <16 x half>
+  %r268 = uitofp <16 x i64> poison to <16 x half>
+  %r269 = sitofp <16 x i64> poison to <16 x half>
   ret void
 }
 
 define void @bf16cast() {
 ; CHECK-NOFP16-LABEL: 'bf16cast'
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %extf16f32 = fpext bfloat undef to float
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extf16f64 = fpext bfloat undef to double
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncf16f32 = fptrunc float undef to bfloat
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %truncf16f64 = fptrunc double undef to bfloat
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
-; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %extf16f32 = fpext bfloat poison to float
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x bfloat> poison to <2 x float>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x bfloat> poison to <4 x float>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x bfloat> poison to <8 x float>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x bfloat> poison to <16 x float>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extf16f64 = fpext bfloat poison to double
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x bfloat> poison to <2 x double>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x bfloat> poison to <4 x double>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x bfloat> poison to <8 x double>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x bfloat> poison to <16 x double>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncf16f32 = fptrunc float poison to bfloat
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f32 = fptrunc <2 x float> poison to <2 x bfloat>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f32 = fptrunc <4 x float> poison to <4 x bfloat>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f32 = fptrunc <8 x float> poison to <8 x bfloat>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:30 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f32 = fptrunc <16 x float> poison to <16 x bfloat>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %truncf16f64 = fptrunc double poison to bfloat
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f64 = fptrunc <2 x double> poison to <2 x bfloat>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f64 = fptrunc <4 x double> poison to <4 x bfloat>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f64 = fptrunc <8 x double> poison to <8 x bfloat>
+; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f64 = fptrunc <16 x double> poison to <16 x bfloat>
 ; CHECK-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'bf16cast'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %extf16f32 = fpext bfloat undef to float
-; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extf16f64 = fpext bfloat undef to double
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %truncf16f32 = fptrunc float undef to bfloat
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %truncf16f64 = fptrunc double undef to bfloat
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %extf16f32 = fpext bfloat poison to float
+; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %extv2f16f32 = fpext <2 x bfloat> poison to <2 x float>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %extv4f16f32 = fpext <4 x bfloat> poison to <4 x float>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f32 = fpext <8 x bfloat> poison to <8 x float>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f32 = fpext <16 x bfloat> poison to <16 x float>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extf16f64 = fpext bfloat poison to double
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %extv2f16f64 = fpext <2 x bfloat> poison to <2 x double>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %extv4f16f64 = fpext <4 x bfloat> poison to <4 x double>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %extv8f16f64 = fpext <8 x bfloat> poison to <8 x double>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %extv16f16f64 = fpext <16 x bfloat> poison to <16 x double>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %truncf16f32 = fptrunc float poison to bfloat
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f32 = fptrunc <2 x float> poison to <2 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %truncv4f16f32 = fptrunc <4 x float> poison to <4 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f32 = fptrunc <8 x float> poison to <8 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f32 = fptrunc <16 x float> poison to <16 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %truncf16f64 = fptrunc double poison to bfloat
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %truncv2f16f64 = fptrunc <2 x double> poison to <2 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %truncv4f16f64 = fptrunc <4 x double> poison to <4 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %truncv8f16f64 = fptrunc <8 x double> poison to <8 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %truncv16f16f64 = fptrunc <16 x double> poison to <16 x bfloat>
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %extf16f32 = fpext bfloat undef to float
-  %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
-  %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
-  %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
-  %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
-  %extf16f64 = fpext bfloat undef to double
-  %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
-  %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
-  %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
-  %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
-  %truncf16f32 = fptrunc float undef to bfloat
-  %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
-  %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
-  %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
-  %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
-  %truncf16f64 = fptrunc double undef to bfloat
-  %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
-  %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
-  %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
-  %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
+  %extf16f32 = fpext bfloat poison to float
+  %extv2f16f32 = fpext <2 x bfloat> poison to <2 x float>
+  %extv4f16f32 = fpext <4 x bfloat> poison to <4 x float>
+  %extv8f16f32 = fpext <8 x bfloat> poison to <8 x float>
+  %extv16f16f32 = fpext <16 x bfloat> poison to <16 x float>
+  %extf16f64 = fpext bfloat poison to double
+  %extv2f16f64 = fpext <2 x bfloat> poison to <2 x double>
+  %extv4f16f64 = fpext <4 x bfloat> poison to <4 x double>
+  %extv8f16f64 = fpext <8 x bfloat> poison to <8 x double>
+  %extv16f16f64 = fpext <16 x bfloat> poison to <16 x double>
+  %truncf16f32 = fptrunc float poison to bfloat
+  %truncv2f16f32 = fptrunc <2 x float> poison to <2 x bfloat>
+  %truncv4f16f32 = fptrunc <4 x float> poison to <4 x bfloat>
+  %truncv8f16f32 = fptrunc <8 x float> poison to <8 x bfloat>
+  %truncv16f16f32 = fptrunc <16 x float> poison to <16 x bfloat>
+  %truncf16f64 = fptrunc double poison to bfloat
+  %truncv2f16f64 = fptrunc <2 x double> poison to <2 x bfloat>
+  %truncv4f16f64 = fptrunc <4 x double> poison to <4 x bfloat>
+  %truncv8f16f64 = fptrunc <8 x double> poison to <8 x bfloat>
+  %truncv16f16f64 = fptrunc <16 x double> poison to <16 x bfloat>
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll
index 20b83be..9aea58e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll
@@ -7,13 +7,13 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @uitofp() {
 ; CHECK-NONEON-LABEL: 'uitofp'
-; CHECK-NONEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <16 x i64> undef to <16 x float>
+; CHECK-NONEON-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <16 x i64> poison to <16 x float>
 ; CHECK-NONEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-WITHSVE-LABEL: 'uitofp'
-; CHECK-WITHSVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <16 x i64> undef to <16 x float>
+; CHECK-WITHSVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %conv = uitofp <16 x i64> poison to <16 x float>
 ; CHECK-WITHSVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %conv = uitofp <16 x i64> undef to <16 x float>
+  %conv = uitofp <16 x i64> poison to <16 x float>
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
index cfb130e..ecb4e14 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
@@ -8,1631 +8,1631 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @ext() {
 ; CHECK-SVE-LABEL: 'ext'
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> undef to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64>
 ; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; SVE128-NO-NEON-LABEL: 'ext'
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> undef to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; FIXED-MIN-256-LABEL: 'ext'
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i8i64 = sext <4 x i8> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i8i64 = zext <4 x i8> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i16i64 = sext <4 x i16> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i16i64 = zext <4 x i16> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i32i64 = sext <4 x i32> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i32i64 = zext <4 x i32> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s8i8i32 = sext <8 x i8> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z8i8i32 = zext <8 x i8> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s8i16i32 = sext <8 x i16> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z8i16i32 = zext <8 x i16> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s16i8i16 = sext <16 x i8> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z16i8i16 = zext <16 x i8> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> undef to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; FIXED-MIN-2048-LABEL: 'ext'
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i8i64 = sext <4 x i8> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i8i64 = zext <4 x i8> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i16i64 = sext <4 x i16> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i16i64 = zext <4 x i16> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i32i64 = sext <4 x i32> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i32i64 = zext <4 x i32> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i8i32 = sext <8 x i8> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i8i32 = zext <8 x i8> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i8i64 = sext <8 x i8> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i8i64 = zext <8 x i8> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i16i32 = sext <8 x i16> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i16i32 = zext <8 x i16> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i16i64 = sext <8 x i16> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i16i64 = zext <8 x i16> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i32i64 = sext <8 x i32> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i32i64 = zext <8 x i32> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i8i16 = sext <16 x i8> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i8i16 = zext <16 x i8> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i8i32 = sext <16 x i8> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i8i32 = zext <16 x i8> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i8i64 = sext <16 x i8> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i8i64 = zext <16 x i8> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i16i32 = sext <16 x i16> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i16i32 = zext <16 x i16> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i16i64 = sext <16 x i16> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i16i64 = zext <16 x i16> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i32i64 = sext <16 x i32> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i32i64 = zext <16 x i32> undef to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r0 = sext i1 poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r1 = zext i1 poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r2 = sext i1 poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r3 = zext i1 poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r4 = sext i1 poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r5 = zext i1 poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r6 = sext i1 poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r7 = zext i1 poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r9 = sext i8 poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r10 = zext i8 poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r11 = sext i8 poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r12 = zext i8 poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r13 = sext i8 poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r14 = zext i8 poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r17 = sext i16 poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r18 = zext i16 poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r19 = sext i16 poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r20 = zext i16 poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r24 = sext i32 poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r25 = zext i32 poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i8i16 = sext <2 x i8> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i8i16 = zext <2 x i8> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i8i32 = sext <2 x i8> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i8i32 = zext <2 x i8> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = sext <2 x i8> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i8i64 = zext <2 x i8> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i16i32 = sext <2 x i16> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i16i32 = zext <2 x i16> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = sext <2 x i16> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i16i64 = zext <2 x i16> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = sext <2 x i32> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z2i32i64 = zext <2 x i32> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i8i16 = sext <4 x i8> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i8i16 = zext <4 x i8> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = sext <4 x i8> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i8i32 = zext <4 x i8> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i8i64 = sext <4 x i8> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i8i64 = zext <4 x i8> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = sext <4 x i16> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i16i32 = zext <4 x i16> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i16i64 = sext <4 x i16> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i16i64 = zext <4 x i16> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i32i64 = sext <4 x i32> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z4i32i64 = zext <4 x i32> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = sext <8 x i8> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i8i16 = zext <8 x i8> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i8i32 = sext <8 x i8> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i8i32 = zext <8 x i8> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i8i64 = sext <8 x i8> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i8i64 = zext <8 x i8> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i16i32 = sext <8 x i16> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i16i32 = zext <8 x i16> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i16i64 = sext <8 x i16> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i16i64 = zext <8 x i16> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i32i64 = sext <8 x i32> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z8i32i64 = zext <8 x i32> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i8i16 = sext <16 x i8> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i8i16 = zext <16 x i8> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i8i32 = sext <16 x i8> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i8i32 = zext <16 x i8> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i8i64 = sext <16 x i8> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i8i64 = zext <16 x i8> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i16i32 = sext <16 x i16> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i16i32 = zext <16 x i16> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i16i64 = sext <16 x i16> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i16i64 = zext <16 x i16> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s16i32i64 = sext <16 x i32> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %z16i32i64 = zext <16 x i32> poison to <16 x i64>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %r0 = sext i1 undef to i8
-  %r1 = zext i1 undef to i8
-  %r2 = sext i1 undef to i16
-  %r3 = zext i1 undef to i16
-  %r4 = sext i1 undef to i32
-  %r5 = zext i1 undef to i32
-  %r6 = sext i1 undef to i64
-  %r7 = zext i1 undef to i64
-  %r9 = sext i8 undef to i16
-  %r10 = zext i8 undef to i16
-  %r11 = sext i8 undef to i32
-  %r12 = zext i8 undef to i32
-  %r13 = sext i8 undef to i64
-  %r14 = zext i8 undef to i64
-  %r17 = sext i16 undef to i32
-  %r18 = zext i16 undef to i32
-  %r19 = sext i16 undef to i64
-  %r20 = zext i16 undef to i64
-  %r24 = sext i32 undef to i64
-  %r25 = zext i32 undef to i64
+  %r0 = sext i1 poison to i8
+  %r1 = zext i1 poison to i8
+  %r2 = sext i1 poison to i16
+  %r3 = zext i1 poison to i16
+  %r4 = sext i1 poison to i32
+  %r5 = zext i1 poison to i32
+  %r6 = sext i1 poison to i64
+  %r7 = zext i1 poison to i64
+  %r9 = sext i8 poison to i16
+  %r10 = zext i8 poison to i16
+  %r11 = sext i8 poison to i32
+  %r12 = zext i8 poison to i32
+  %r13 = sext i8 poison to i64
+  %r14 = zext i8 poison to i64
+  %r17 = sext i16 poison to i32
+  %r18 = zext i16 poison to i32
+  %r19 = sext i16 poison to i64
+  %r20 = zext i16 poison to i64
+  %r24 = sext i32 poison to i64
+  %r25 = zext i32 poison to i64
 
-  %s2i8i16 = sext <2 x i8> undef to <2 x i16>
-  %z2i8i16 = zext <2 x i8> undef to <2 x i16>
-  %s2i8i32 = sext <2 x i8> undef to <2 x i32>
-  %z2i8i32 = zext <2 x i8> undef to <2 x i32>
-  %s2i8i64 = sext <2 x i8> undef to <2 x i64>
-  %z2i8i64 = zext <2 x i8> undef to <2 x i64>
-  %s2i16i32 = sext <2 x i16> undef to <2 x i32>
-  %z2i16i32 = zext <2 x i16> undef to <2 x i32>
-  %s2i16i64 = sext <2 x i16> undef to <2 x i64>
-  %z2i16i64 = zext <2 x i16> undef to <2 x i64>
-  %s2i32i64 = sext <2 x i32> undef to <2 x i64>
-  %z2i32i64 = zext <2 x i32> undef to <2 x i64>
+  %s2i8i16 = sext <2 x i8> poison to <2 x i16>
+  %z2i8i16 = zext <2 x i8> poison to <2 x i16>
+  %s2i8i32 = sext <2 x i8> poison to <2 x i32>
+  %z2i8i32 = zext <2 x i8> poison to <2 x i32>
+  %s2i8i64 = sext <2 x i8> poison to <2 x i64>
+  %z2i8i64 = zext <2 x i8> poison to <2 x i64>
+  %s2i16i32 = sext <2 x i16> poison to <2 x i32>
+  %z2i16i32 = zext <2 x i16> poison to <2 x i32>
+  %s2i16i64 = sext <2 x i16> poison to <2 x i64>
+  %z2i16i64 = zext <2 x i16> poison to <2 x i64>
+  %s2i32i64 = sext <2 x i32> poison to <2 x i64>
+  %z2i32i64 = zext <2 x i32> poison to <2 x i64>
 
-  %s4i8i16 = sext <4 x i8>  undef to <4 x i16>
-  %z4i8i16 = zext <4 x i8>  undef to <4 x i16>
-  %s4i8i32 = sext <4 x i8>  undef to <4 x i32>
-  %z4i8i32 = zext <4 x i8>  undef to <4 x i32>
-  %s4i8i64 = sext <4 x i8>  undef to <4 x i64>
-  %z4i8i64 = zext <4 x i8>  undef to <4 x i64>
-  %s4i16i32 = sext <4 x i16> undef to <4 x i32>
-  %z4i16i32 = zext <4 x i16> undef to <4 x i32>
-  %s4i16i64 = sext <4 x i16> undef to <4 x i64>
-  %z4i16i64 = zext <4 x i16> undef to <4 x i64>
-  %s4i32i64 = sext <4 x i32> undef to <4 x i64>
-  %z4i32i64 = zext <4 x i32> undef to <4 x i64>
+  %s4i8i16 = sext <4 x i8>  poison to <4 x i16>
+  %z4i8i16 = zext <4 x i8>  poison to <4 x i16>
+  %s4i8i32 = sext <4 x i8>  poison to <4 x i32>
+  %z4i8i32 = zext <4 x i8>  poison to <4 x i32>
+  %s4i8i64 = sext <4 x i8>  poison to <4 x i64>
+  %z4i8i64 = zext <4 x i8>  poison to <4 x i64>
+  %s4i16i32 = sext <4 x i16> poison to <4 x i32>
+  %z4i16i32 = zext <4 x i16> poison to <4 x i32>
+  %s4i16i64 = sext <4 x i16> poison to <4 x i64>
+  %z4i16i64 = zext <4 x i16> poison to <4 x i64>
+  %s4i32i64 = sext <4 x i32> poison to <4 x i64>
+  %z4i32i64 = zext <4 x i32> poison to <4 x i64>
 
-  %s8i8i16 = sext <8 x i8>  undef to <8 x i16>
-  %z8i8i16 = zext <8 x i8>  undef to <8 x i16>
-  %s8i8i32 = sext <8 x i8>  undef to <8 x i32>
-  %z8i8i32 = zext <8 x i8>  undef to <8 x i32>
-  %s8i8i64 = sext <8 x i8>  undef to <8 x i64>
-  %z8i8i64 = zext <8 x i8>  undef to <8 x i64>
-  %s8i16i32 = sext <8 x i16> undef to <8 x i32>
-  %z8i16i32 = zext <8 x i16> undef to <8 x i32>
-  %s8i16i64 = sext <8 x i16> undef to <8 x i64>
-  %z8i16i64 = zext <8 x i16> undef to <8 x i64>
-  %s8i32i64 = sext <8 x i32> undef to <8 x i64>
-  %z8i32i64 = zext <8 x i32> undef to <8 x i64>
+  %s8i8i16 = sext <8 x i8>  poison to <8 x i16>
+  %z8i8i16 = zext <8 x i8>  poison to <8 x i16>
+  %s8i8i32 = sext <8 x i8>  poison to <8 x i32>
+  %z8i8i32 = zext <8 x i8>  poison to <8 x i32>
+  %s8i8i64 = sext <8 x i8>  poison to <8 x i64>
+  %z8i8i64 = zext <8 x i8>  poison to <8 x i64>
+  %s8i16i32 = sext <8 x i16> poison to <8 x i32>
+  %z8i16i32 = zext <8 x i16> poison to <8 x i32>
+  %s8i16i64 = sext <8 x i16> poison to <8 x i64>
+  %z8i16i64 = zext <8 x i16> poison to <8 x i64>
+  %s8i32i64 = sext <8 x i32> poison to <8 x i64>
+  %z8i32i64 = zext <8 x i32> poison to <8 x i64>
 
-  %s16i8i16 = sext <16 x i8>  undef to <16 x i16>
-  %z16i8i16 = zext <16 x i8>  undef to <16 x i16>
-  %s16i8i32 = sext <16 x i8>  undef to <16 x i32>
-  %z16i8i32 = zext <16 x i8>  undef to <16 x i32>
-  %s16i8i64 = sext <16 x i8>  undef to <16 x i64>
-  %z16i8i64 = zext <16 x i8>  undef to <16 x i64>
-  %s16i16i32 = sext <16 x i16> undef to <16 x i32>
-  %z16i16i32 = zext <16 x i16> undef to <16 x i32>
-  %s16i16i64 = sext <16 x i16> undef to <16 x i64>
-  %z16i16i64 = zext <16 x i16> undef to <16 x i64>
-  %s16i32i64 = sext <16 x i32> undef to <16 x i64>
-  %z16i32i64 = zext <16 x i32> undef to <16 x i64>
+  %s16i8i16 = sext <16 x i8>  poison to <16 x i16>
+  %z16i8i16 = zext <16 x i8>  poison to <16 x i16>
+  %s16i8i32 = sext <16 x i8>  poison to <16 x i32>
+  %z16i8i32 = zext <16 x i8>  poison to <16 x i32>
+  %s16i8i64 = sext <16 x i8>  poison to <16 x i64>
+  %z16i8i64 = zext <16 x i8>  poison to <16 x i64>
+  %s16i16i32 = sext <16 x i16> poison to <16 x i32>
+  %z16i16i32 = zext <16 x i16> poison to <16 x i32>
+  %s16i16i64 = sext <16 x i16> poison to <16 x i64>
+  %z16i16i64 = zext <16 x i16> poison to <16 x i64>
+  %s16i32i64 = sext <16 x i32> poison to <16 x i64>
+  %z16i32i64 = zext <16 x i32> poison to <16 x i64>
   ret void
 }
 
 define void @trunc() {
 ; CHECK-SVE-LABEL: 'trunc'
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = trunc <2 x i64> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = trunc <2 x i64> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s4i32i64 = trunc <4 x i64> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s8i16i32 = trunc <8 x i32> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %s16i8i16 = trunc <16 x i16> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32>
 ; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; SVE128-NO-NEON-LABEL: 'trunc'
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i8i64 = trunc <2 x i64> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i16i64 = trunc <2 x i64> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s2i32i64 = trunc <2 x i64> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i8i32 = trunc <4 x i32> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i16i32 = trunc <4 x i32> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s4i32i64 = trunc <4 x i64> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i8i16 = trunc <8 x i16> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i16i32 = trunc <8 x i32> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i8i16 = trunc <16 x i16> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 0 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; FIXED-MIN-256-LABEL: 'trunc'
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = trunc <2 x i64> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = trunc <2 x i64> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s4i32i64 = trunc <4 x i64> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i16i32 = trunc <8 x i32> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i8i16 = trunc <16 x i16> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 0 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; FIXED-MIN-2048-LABEL: 'trunc'
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r8 = trunc i8 poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r15 = trunc i16 poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r16 = trunc i16 poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r21 = trunc i32 poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r22 = trunc i32 poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r23 = trunc i32 poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r26 = trunc i64 poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r27 = trunc i64 poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r28 = trunc i64 poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %r29 = trunc i64 poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s2i8i16 = trunc <2 x i16> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s2i8i32 = trunc <2 x i32> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i8i64 = trunc <2 x i64> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s2i16i32 = trunc <2 x i32> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i16i64 = trunc <2 x i64> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s2i32i64 = trunc <2 x i64> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s4i8i16 = trunc <4 x i16> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i8i32 = trunc <4 x i32> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s4i8i64 = trunc <4 x i64> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s4i16i32 = trunc <4 x i32> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s4i16i64 = trunc <4 x i64> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s4i32i64 = trunc <4 x i64> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %s8i8i16 = trunc <8 x i16> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i8i32 = trunc <8 x i32> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i8i64 = trunc <8 x i64> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i16i32 = trunc <8 x i32> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i16i64 = trunc <8 x i64> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s8i32i64 = trunc <8 x i64> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i8i16 = trunc <16 x i16> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i8i32 = trunc <16 x i32> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i8i64 = trunc <16 x i64> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i16i32 = trunc <16 x i32> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i16i64 = trunc <16 x i64> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 0 for: %s16i32i64 = trunc <16 x i64> poison to <16 x i32>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %r8 = trunc i8 undef to i1
-  %r15 = trunc i16 undef to i1
-  %r16 = trunc i16 undef to i8
-  %r21 = trunc i32 undef to i1
-  %r22 = trunc i32 undef to i8
-  %r23 = trunc i32 undef to i16
-  %r26 = trunc i64 undef to i1
-  %r27 = trunc i64 undef to i8
-  %r28 = trunc i64 undef to i16
-  %r29 = trunc i64 undef to i32
+  %r8 = trunc i8 poison to i1
+  %r15 = trunc i16 poison to i1
+  %r16 = trunc i16 poison to i8
+  %r21 = trunc i32 poison to i1
+  %r22 = trunc i32 poison to i8
+  %r23 = trunc i32 poison to i16
+  %r26 = trunc i64 poison to i1
+  %r27 = trunc i64 poison to i8
+  %r28 = trunc i64 poison to i16
+  %r29 = trunc i64 poison to i32
 
-  %s2i8i16 = trunc <2 x i16> undef to <2 x i8>
-  %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
-  %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
-  %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
-  %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-  %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
+  %s2i8i16 = trunc <2 x i16> poison to <2 x i8>
+  %s2i8i32 = trunc <2 x i32> poison to <2 x i8>
+  %s2i8i64 = trunc <2 x i64> poison to <2 x i8>
+  %s2i16i32 = trunc <2 x i32> poison to <2 x i16>
+  %s2i16i64 = trunc <2 x i64> poison to <2 x i16>
+  %s2i32i64 = trunc <2 x i64> poison to <2 x i32>
 
-  %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
-  %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
-  %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
-  %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
-  %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-  %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+  %s4i8i16 = trunc <4 x i16> poison to <4 x i8>
+  %s4i8i32 = trunc <4 x i32> poison to <4 x i8>
+  %s4i8i64 = trunc <4 x i64> poison to <4 x i8>
+  %s4i16i32 = trunc <4 x i32> poison to <4 x i16>
+  %s4i16i64 = trunc <4 x i64> poison to <4 x i16>
+  %s4i32i64 = trunc <4 x i64> poison to <4 x i32>
 
-  %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
-  %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
-  %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-  %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
-  %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-  %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+  %s8i8i16 = trunc <8 x i16> poison to <8 x i8>
+  %s8i8i32 = trunc <8 x i32> poison to <8 x i8>
+  %s8i8i64 = trunc <8 x i64> poison to <8 x i8>
+  %s8i16i32 = trunc <8 x i32> poison to <8 x i16>
+  %s8i16i64 = trunc <8 x i64> poison to <8 x i16>
+  %s8i32i64 = trunc <8 x i64> poison to <8 x i32>
 
-  %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
-  %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
-  %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-  %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
-  %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-  %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+  %s16i8i16 = trunc <16 x i16> poison to <16 x i8>
+  %s16i8i32 = trunc <16 x i32> poison to <16 x i8>
+  %s16i8i64 = trunc <16 x i64> poison to <16 x i8>
+  %s16i16i32 = trunc <16 x i32> poison to <16 x i16>
+  %s16i16i64 = trunc <16 x i64> poison to <16 x i16>
+  %s16i32i64 = trunc <16 x i64> poison to <16 x i32>
   ret void
 }
 
 define i32 @casts_no_users() {
 ; CHECK-SVE-LABEL: 'casts_no_users'
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double undef to float
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float undef to double
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> undef to <2 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> undef to <2 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> undef to <2 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> undef to <2 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> undef to <4 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> undef to <4 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> undef to <4 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> undef to <4 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> undef to <8 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> undef to <8 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> undef to <8 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> undef to <8 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> undef to <16 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> undef to <16 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> undef to <16 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> undef to <16 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> undef to <16 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> undef to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float poison to double
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> poison to <2 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> poison to <2 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> poison to <4 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> poison to <4 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> poison to <4 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> poison to <4 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> poison to <8 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:41 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> poison to <8 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> poison to <8 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> poison to <8 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> poison to <16 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:83 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> poison to <16 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> poison to <16 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:87 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> poison to <16 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:23 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> poison to <2 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> poison to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> poison to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> poison to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> poison to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> poison to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> poison to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> poison to <16 x double>
 ; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; SVE128-NO-NEON-LABEL: 'casts_no_users'
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double undef to float
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float undef to double
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> undef to <2 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> undef to <2 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r98 = fptoui <2 x float> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r99 = fptosi <2 x float> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r100 = fptoui <2 x double> undef to <2 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r101 = fptosi <2 x double> undef to <2 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r102 = fptoui <2 x double> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r103 = fptosi <2 x double> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r104 = fptoui <2 x double> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r105 = fptosi <2 x double> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r106 = fptoui <2 x double> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r107 = fptosi <2 x double> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x float> undef to <4 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x float> undef to <4 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x float> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x float> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x float> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x float> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> undef to <4 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> undef to <4 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> undef to <8 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> undef to <8 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> undef to <8 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> undef to <8 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> undef to <16 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> undef to <16 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> undef to <16 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> undef to <16 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r174 = uitofp <2 x i16> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r175 = sitofp <2 x i16> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r184 = uitofp <2 x i16> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r185 = sitofp <2 x i16> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r186 = uitofp <2 x i32> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r187 = sitofp <2 x i32> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> undef to <2 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r194 = uitofp <4 x i16> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r195 = sitofp <4 x i16> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> undef to <4 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> undef to <8 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> undef to <16 x double>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> undef to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r82 = fptrunc <4 x double> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float poison to double
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r87 = fpext <4 x float> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r98 = fptoui <2 x float> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r99 = fptosi <2 x float> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r100 = fptoui <2 x double> poison to <2 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r101 = fptosi <2 x double> poison to <2 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r102 = fptoui <2 x double> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r103 = fptosi <2 x double> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r104 = fptoui <2 x double> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r105 = fptosi <2 x double> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r106 = fptoui <2 x double> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r107 = fptosi <2 x double> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x float> poison to <4 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x float> poison to <4 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x float> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x float> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x float> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x float> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x float> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x float> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r120 = fptoui <4 x double> poison to <4 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r121 = fptosi <4 x double> poison to <4 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r122 = fptoui <4 x double> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r123 = fptosi <4 x double> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r124 = fptoui <4 x double> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r125 = fptosi <4 x double> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r126 = fptoui <4 x double> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r127 = fptosi <4 x double> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r128 = fptoui <4 x double> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r129 = fptosi <4 x double> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x float> poison to <8 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x float> poison to <8 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x float> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x float> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r134 = fptoui <8 x float> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r135 = fptosi <8 x float> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x float> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x float> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> poison to <8 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> poison to <8 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> poison to <16 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> poison to <16 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> poison to <16 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> poison to <16 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r174 = uitofp <2 x i16> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r175 = sitofp <2 x i16> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r178 = uitofp <2 x i64> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r179 = sitofp <2 x i64> poison to <2 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r184 = uitofp <2 x i16> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r185 = sitofp <2 x i16> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r186 = uitofp <2 x i32> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r187 = sitofp <2 x i32> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r194 = uitofp <4 x i16> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r195 = sitofp <4 x i16> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> poison to <4 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r204 = uitofp <4 x i16> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r205 = sitofp <4 x i16> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r206 = uitofp <4 x i32> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r207 = sitofp <4 x i32> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r208 = uitofp <4 x i64> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r209 = sitofp <4 x i64> poison to <4 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r210 = uitofp <8 x i1> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r211 = sitofp <8 x i1> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r214 = uitofp <8 x i16> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> poison to <8 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> poison to <8 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> poison to <16 x float>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> poison to <16 x double>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> poison to <16 x double>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; FIXED-MIN-256-LABEL: 'casts_no_users'
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double undef to float
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r82 = fptrunc <4 x double> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float undef to double
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r87 = fpext <4 x float> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> undef to <2 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> undef to <2 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> undef to <2 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> undef to <2 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> undef to <4 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> undef to <4 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r118 = fptoui <4 x float> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r119 = fptosi <4 x float> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r120 = fptoui <4 x double> undef to <4 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r121 = fptosi <4 x double> undef to <4 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r122 = fptoui <4 x double> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r123 = fptosi <4 x double> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r124 = fptoui <4 x double> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r125 = fptosi <4 x double> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r126 = fptoui <4 x double> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r127 = fptosi <4 x double> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r128 = fptoui <4 x double> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r129 = fptosi <4 x double> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r130 = fptoui <8 x float> undef to <8 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r131 = fptosi <8 x float> undef to <8 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r132 = fptoui <8 x float> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r133 = fptosi <8 x float> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x float> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x float> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r136 = fptoui <8 x float> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r137 = fptosi <8 x float> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> undef to <8 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> undef to <8 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> undef to <16 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> undef to <16 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> undef to <16 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> undef to <16 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> undef to <2 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r200 = uitofp <4 x i1> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r201 = sitofp <4 x i1> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r204 = uitofp <4 x i16> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r205 = sitofp <4 x i16> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r206 = uitofp <4 x i32> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r207 = sitofp <4 x i32> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r208 = uitofp <4 x i64> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r209 = sitofp <4 x i64> undef to <4 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r210 = uitofp <8 x i1> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r211 = sitofp <8 x i1> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r214 = uitofp <8 x i16> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r215 = sitofp <8 x i16> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r216 = uitofp <8 x i32> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r217 = sitofp <8 x i32> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> undef to <8 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> undef to <16 x double>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> undef to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r82 = fptrunc <4 x double> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r83 = fptrunc <8 x double> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r84 = fptrunc <16 x double> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float poison to double
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r87 = fpext <4 x float> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r88 = fpext <8 x float> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r89 = fpext <16 x float> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> poison to <2 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> poison to <2 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> poison to <4 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> poison to <4 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r118 = fptoui <4 x float> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r119 = fptosi <4 x float> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r120 = fptoui <4 x double> poison to <4 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r121 = fptosi <4 x double> poison to <4 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r122 = fptoui <4 x double> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r123 = fptosi <4 x double> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r124 = fptoui <4 x double> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r125 = fptosi <4 x double> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r126 = fptoui <4 x double> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r127 = fptosi <4 x double> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r128 = fptoui <4 x double> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r129 = fptosi <4 x double> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r130 = fptoui <8 x float> poison to <8 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r131 = fptosi <8 x float> poison to <8 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r132 = fptoui <8 x float> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r133 = fptosi <8 x float> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x float> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x float> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r136 = fptoui <8 x float> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r137 = fptosi <8 x float> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x float> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x float> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r140 = fptoui <8 x double> poison to <8 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r141 = fptosi <8 x double> poison to <8 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r142 = fptoui <8 x double> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r143 = fptosi <8 x double> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r144 = fptoui <8 x double> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r145 = fptosi <8 x double> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r146 = fptoui <8 x double> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r147 = fptosi <8 x double> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r148 = fptoui <8 x double> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r149 = fptosi <8 x double> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x float> poison to <16 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x float> poison to <16 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x float> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x float> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x float> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x float> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x float> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x float> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x float> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x float> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r160 = fptoui <16 x double> poison to <16 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r161 = fptosi <16 x double> poison to <16 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r162 = fptoui <16 x double> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r163 = fptosi <16 x double> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r164 = fptoui <16 x double> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r165 = fptosi <16 x double> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r166 = fptoui <16 x double> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r167 = fptosi <16 x double> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r168 = fptoui <16 x double> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r169 = fptosi <16 x double> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> poison to <2 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r198 = uitofp <4 x i64> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r199 = sitofp <4 x i64> poison to <4 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r200 = uitofp <4 x i1> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r201 = sitofp <4 x i1> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r204 = uitofp <4 x i16> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r205 = sitofp <4 x i16> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r206 = uitofp <4 x i32> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r207 = sitofp <4 x i32> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r208 = uitofp <4 x i64> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r209 = sitofp <4 x i64> poison to <4 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r210 = uitofp <8 x i1> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r211 = sitofp <8 x i1> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r214 = uitofp <8 x i16> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r215 = sitofp <8 x i16> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r216 = uitofp <8 x i32> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r217 = sitofp <8 x i32> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> poison to <8 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r224 = uitofp <8 x i16> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r225 = sitofp <8 x i16> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r226 = uitofp <8 x i32> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r227 = sitofp <8 x i32> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r228 = uitofp <8 x i64> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r229 = sitofp <8 x i64> poison to <8 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r230 = uitofp <16 x i1> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r231 = sitofp <16 x i1> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r234 = uitofp <16 x i16> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> poison to <16 x float>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r244 = uitofp <16 x i16> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r245 = sitofp <16 x i16> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r246 = uitofp <16 x i32> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r247 = sitofp <16 x i32> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r248 = uitofp <16 x i64> poison to <16 x double>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r249 = sitofp <16 x i64> poison to <16 x double>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
 ; FIXED-MIN-2048-LABEL: 'casts_no_users'
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double undef to float
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r82 = fptrunc <4 x double> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r83 = fptrunc <8 x double> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r84 = fptrunc <16 x double> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float undef to double
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r87 = fpext <4 x float> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r88 = fpext <8 x float> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r89 = fpext <16 x float> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> undef to <2 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> undef to <2 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> undef to <2 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> undef to <2 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> undef to <4 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> undef to <4 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r118 = fptoui <4 x float> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r119 = fptosi <4 x float> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r120 = fptoui <4 x double> undef to <4 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r121 = fptosi <4 x double> undef to <4 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r122 = fptoui <4 x double> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r123 = fptosi <4 x double> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r124 = fptoui <4 x double> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r125 = fptosi <4 x double> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r126 = fptoui <4 x double> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r127 = fptosi <4 x double> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r128 = fptoui <4 x double> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r129 = fptosi <4 x double> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r130 = fptoui <8 x float> undef to <8 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r131 = fptosi <8 x float> undef to <8 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r132 = fptoui <8 x float> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r133 = fptosi <8 x float> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x float> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x float> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r136 = fptoui <8 x float> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r137 = fptosi <8 x float> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r138 = fptoui <8 x float> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r139 = fptosi <8 x float> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r140 = fptoui <8 x double> undef to <8 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r141 = fptosi <8 x double> undef to <8 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r142 = fptoui <8 x double> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r143 = fptosi <8 x double> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r144 = fptoui <8 x double> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r145 = fptosi <8 x double> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r146 = fptoui <8 x double> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r147 = fptosi <8 x double> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r148 = fptoui <8 x double> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r149 = fptosi <8 x double> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r150 = fptoui <16 x float> undef to <16 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r151 = fptosi <16 x float> undef to <16 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r152 = fptoui <16 x float> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r153 = fptosi <16 x float> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r154 = fptoui <16 x float> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r155 = fptosi <16 x float> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r156 = fptoui <16 x float> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r157 = fptosi <16 x float> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r158 = fptoui <16 x float> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r159 = fptosi <16 x float> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r160 = fptoui <16 x double> undef to <16 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r161 = fptosi <16 x double> undef to <16 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r162 = fptoui <16 x double> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r163 = fptosi <16 x double> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r164 = fptoui <16 x double> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r165 = fptosi <16 x double> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r166 = fptoui <16 x double> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r167 = fptosi <16 x double> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r168 = fptoui <16 x double> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r169 = fptosi <16 x double> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> undef to <2 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r198 = uitofp <4 x i64> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r199 = sitofp <4 x i64> undef to <4 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r200 = uitofp <4 x i1> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r201 = sitofp <4 x i1> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r204 = uitofp <4 x i16> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r205 = sitofp <4 x i16> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r206 = uitofp <4 x i32> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r207 = sitofp <4 x i32> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r208 = uitofp <4 x i64> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r209 = sitofp <4 x i64> undef to <4 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r210 = uitofp <8 x i1> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r211 = sitofp <8 x i1> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r214 = uitofp <8 x i16> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r215 = sitofp <8 x i16> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r216 = uitofp <8 x i32> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r217 = sitofp <8 x i32> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r218 = uitofp <8 x i64> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r219 = sitofp <8 x i64> undef to <8 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r220 = uitofp <8 x i1> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r221 = sitofp <8 x i1> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r224 = uitofp <8 x i16> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r225 = sitofp <8 x i16> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r226 = uitofp <8 x i32> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r227 = sitofp <8 x i32> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r228 = uitofp <8 x i64> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r229 = sitofp <8 x i64> undef to <8 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r230 = uitofp <16 x i1> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r231 = sitofp <16 x i1> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r234 = uitofp <16 x i16> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r235 = sitofp <16 x i16> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r236 = uitofp <16 x i32> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r237 = sitofp <16 x i32> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r238 = uitofp <16 x i64> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r239 = sitofp <16 x i64> undef to <16 x float>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r240 = uitofp <16 x i1> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r241 = sitofp <16 x i1> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r244 = uitofp <16 x i16> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r245 = sitofp <16 x i16> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r246 = uitofp <16 x i32> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r247 = sitofp <16 x i32> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r248 = uitofp <16 x i64> undef to <16 x double>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r249 = sitofp <16 x i64> undef to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui float poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi float poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui float poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi float poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui float poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi float poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui float poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi float poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui float poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi float poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r40 = fptoui double poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r41 = fptosi double poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r42 = fptoui double poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r43 = fptosi double poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r44 = fptoui double poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r45 = fptosi double poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r46 = fptoui double poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r47 = fptosi double poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r48 = fptoui double poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r49 = fptosi double poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r50 = sitofp i1 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r51 = uitofp i1 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r52 = sitofp i1 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r53 = uitofp i1 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r54 = sitofp i8 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r55 = uitofp i8 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r56 = sitofp i8 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r57 = uitofp i8 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r58 = sitofp i16 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r59 = uitofp i16 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r60 = sitofp i16 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r61 = uitofp i16 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r62 = sitofp i32 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r63 = uitofp i32 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r64 = sitofp i32 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r65 = uitofp i32 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r66 = sitofp i64 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r67 = uitofp i64 poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r68 = sitofp i64 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r69 = uitofp i64 poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r80 = fptrunc double poison to float
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r81 = fptrunc <2 x double> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r82 = fptrunc <4 x double> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r83 = fptrunc <8 x double> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r84 = fptrunc <16 x double> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r85 = fpext float poison to double
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r86 = fpext <2 x float> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r87 = fpext <4 x float> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r88 = fpext <8 x float> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r89 = fpext <16 x float> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x float> poison to <2 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x float> poison to <2 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x float> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x float> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x float> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x float> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x float> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x float> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x float> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x float> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r100 = fptoui <2 x double> poison to <2 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r101 = fptosi <2 x double> poison to <2 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r102 = fptoui <2 x double> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r103 = fptosi <2 x double> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r104 = fptoui <2 x double> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r105 = fptosi <2 x double> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r106 = fptoui <2 x double> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r107 = fptosi <2 x double> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r108 = fptoui <2 x double> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r109 = fptosi <2 x double> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r110 = fptoui <4 x float> poison to <4 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r111 = fptosi <4 x float> poison to <4 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r112 = fptoui <4 x float> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r113 = fptosi <4 x float> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r114 = fptoui <4 x float> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r115 = fptosi <4 x float> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x float> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x float> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r118 = fptoui <4 x float> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r119 = fptosi <4 x float> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r120 = fptoui <4 x double> poison to <4 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r121 = fptosi <4 x double> poison to <4 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r122 = fptoui <4 x double> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r123 = fptosi <4 x double> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r124 = fptoui <4 x double> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r125 = fptosi <4 x double> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r126 = fptoui <4 x double> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r127 = fptosi <4 x double> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r128 = fptoui <4 x double> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r129 = fptosi <4 x double> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r130 = fptoui <8 x float> poison to <8 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r131 = fptosi <8 x float> poison to <8 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r132 = fptoui <8 x float> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r133 = fptosi <8 x float> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x float> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x float> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r136 = fptoui <8 x float> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r137 = fptosi <8 x float> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r138 = fptoui <8 x float> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r139 = fptosi <8 x float> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r140 = fptoui <8 x double> poison to <8 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r141 = fptosi <8 x double> poison to <8 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r142 = fptoui <8 x double> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r143 = fptosi <8 x double> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r144 = fptoui <8 x double> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r145 = fptosi <8 x double> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r146 = fptoui <8 x double> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r147 = fptosi <8 x double> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r148 = fptoui <8 x double> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r149 = fptosi <8 x double> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r150 = fptoui <16 x float> poison to <16 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r151 = fptosi <16 x float> poison to <16 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r152 = fptoui <16 x float> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r153 = fptosi <16 x float> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r154 = fptoui <16 x float> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r155 = fptosi <16 x float> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r156 = fptoui <16 x float> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r157 = fptosi <16 x float> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r158 = fptoui <16 x float> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r159 = fptosi <16 x float> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r160 = fptoui <16 x double> poison to <16 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r161 = fptosi <16 x double> poison to <16 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r162 = fptoui <16 x double> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r163 = fptosi <16 x double> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r164 = fptoui <16 x double> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r165 = fptosi <16 x double> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r166 = fptoui <16 x double> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r167 = fptosi <16 x double> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r168 = fptoui <16 x double> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r169 = fptosi <16 x double> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r170 = uitofp <2 x i1> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r171 = sitofp <2 x i1> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r172 = uitofp <2 x i8> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r173 = sitofp <2 x i8> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r174 = uitofp <2 x i16> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> poison to <2 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r183 = sitofp <2 x i8> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r184 = uitofp <2 x i16> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r185 = sitofp <2 x i16> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r186 = uitofp <2 x i32> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r187 = sitofp <2 x i32> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r188 = uitofp <2 x i64> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r189 = sitofp <2 x i64> poison to <2 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r190 = uitofp <4 x i1> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r191 = sitofp <4 x i1> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r192 = uitofp <4 x i8> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r193 = sitofp <4 x i8> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r194 = uitofp <4 x i16> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r198 = uitofp <4 x i64> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r199 = sitofp <4 x i64> poison to <4 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r200 = uitofp <4 x i1> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r201 = sitofp <4 x i1> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r203 = sitofp <4 x i8> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r204 = uitofp <4 x i16> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r205 = sitofp <4 x i16> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r206 = uitofp <4 x i32> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r207 = sitofp <4 x i32> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r208 = uitofp <4 x i64> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r209 = sitofp <4 x i64> poison to <4 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r210 = uitofp <8 x i1> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r211 = sitofp <8 x i1> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r212 = uitofp <8 x i8> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r213 = sitofp <8 x i8> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r214 = uitofp <8 x i16> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r215 = sitofp <8 x i16> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r216 = uitofp <8 x i32> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r217 = sitofp <8 x i32> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r218 = uitofp <8 x i64> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r219 = sitofp <8 x i64> poison to <8 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r220 = uitofp <8 x i1> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r221 = sitofp <8 x i1> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r223 = sitofp <8 x i8> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r224 = uitofp <8 x i16> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r225 = sitofp <8 x i16> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r226 = uitofp <8 x i32> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r227 = sitofp <8 x i32> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r228 = uitofp <8 x i64> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r229 = sitofp <8 x i64> poison to <8 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r230 = uitofp <16 x i1> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r231 = sitofp <16 x i1> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r232 = uitofp <16 x i8> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r233 = sitofp <16 x i8> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r234 = uitofp <16 x i16> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r235 = sitofp <16 x i16> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r236 = uitofp <16 x i32> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r237 = sitofp <16 x i32> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r238 = uitofp <16 x i64> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r239 = sitofp <16 x i64> poison to <16 x float>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r240 = uitofp <16 x i1> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r241 = sitofp <16 x i1> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r243 = sitofp <16 x i8> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r244 = uitofp <16 x i16> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r245 = sitofp <16 x i16> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r246 = uitofp <16 x i32> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r247 = sitofp <16 x i32> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r248 = uitofp <16 x i64> poison to <16 x double>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r249 = sitofp <16 x i64> poison to <16 x double>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
-  %r30 = fptoui float undef to i1
-  %r31 = fptosi float undef to i1
-  %r32 = fptoui float undef to i8
-  %r33 = fptosi float undef to i8
-  %r34 = fptoui float undef to i16
-  %r35 = fptosi float undef to i16
-  %r36 = fptoui float undef to i32
-  %r37 = fptosi float undef to i32
-  %r38 = fptoui float undef to i64
-  %r39 = fptosi float undef to i64
-  %r40 = fptoui double undef to i1
-  %r41 = fptosi double undef to i1
-  %r42 = fptoui double undef to i8
-  %r43 = fptosi double undef to i8
-  %r44 = fptoui double undef to i16
-  %r45 = fptosi double undef to i16
-  %r46 = fptoui double undef to i32
-  %r47 = fptosi double undef to i32
-  %r48 = fptoui double undef to i64
-  %r49 = fptosi double undef to i64
-  %r50 = sitofp i1 undef to float
-  %r51 = uitofp i1 undef to float
-  %r52 = sitofp i1 undef to double
-  %r53 = uitofp i1 undef to double
-  %r54 = sitofp i8 undef to float
-  %r55 = uitofp i8 undef to float
-  %r56 = sitofp i8 undef to double
-  %r57 = uitofp i8 undef to double
-  %r58 = sitofp i16 undef to float
-  %r59 = uitofp i16 undef to float
-  %r60 = sitofp i16 undef to double
-  %r61 = uitofp i16 undef to double
-  %r62 = sitofp i32 undef to float
-  %r63 = uitofp i32 undef to float
-  %r64 = sitofp i32 undef to double
-  %r65 = uitofp i32 undef to double
-  %r66 = sitofp i64 undef to float
-  %r67 = uitofp i64 undef to float
-  %r68 = sitofp i64 undef to double
-  %r69 = uitofp i64 undef to double
-  %r80 = fptrunc double undef to float
-  %r81 = fptrunc <2 x double> undef to <2 x float>
-  %r82 = fptrunc <4 x double> undef to <4 x float>
-  %r83 = fptrunc <8 x double> undef to <8 x float>
-  %r84 = fptrunc <16 x double> undef to <16 x float>
-  %r85 = fpext float undef to double
-  %r86 = fpext <2 x float> undef to <2 x double>
-  %r87 = fpext <4 x float> undef to <4 x double>
-  %r88 = fpext <8 x float> undef to <8 x double>
-  %r89 = fpext <16 x float> undef to <16 x double>
-  %r90 = fptoui <2 x float> undef to <2 x i1>
-  %r91 = fptosi <2 x float> undef to <2 x i1>
-  %r92 = fptoui <2 x float> undef to <2 x i8>
-  %r93 = fptosi <2 x float> undef to <2 x i8>
-  %r94 = fptoui <2 x float> undef to <2 x i16>
-  %r95 = fptosi <2 x float> undef to <2 x i16>
-  %r96 = fptoui <2 x float> undef to <2 x i32>
-  %r97 = fptosi <2 x float> undef to <2 x i32>
-  %r98 = fptoui <2 x float> undef to <2 x i64>
-  %r99 = fptosi <2 x float> undef to <2 x i64>
-  %r100 = fptoui <2 x double> undef to <2 x i1>
-  %r101 = fptosi <2 x double> undef to <2 x i1>
-  %r102 = fptoui <2 x double> undef to <2 x i8>
-  %r103 = fptosi <2 x double> undef to <2 x i8>
-  %r104 = fptoui <2 x double> undef to <2 x i16>
-  %r105 = fptosi <2 x double> undef to <2 x i16>
-  %r106 = fptoui <2 x double> undef to <2 x i32>
-  %r107 = fptosi <2 x double> undef to <2 x i32>
-  %r108 = fptoui <2 x double> undef to <2 x i64>
-  %r109 = fptosi <2 x double> undef to <2 x i64>
+  %r30 = fptoui float poison to i1
+  %r31 = fptosi float poison to i1
+  %r32 = fptoui float poison to i8
+  %r33 = fptosi float poison to i8
+  %r34 = fptoui float poison to i16
+  %r35 = fptosi float poison to i16
+  %r36 = fptoui float poison to i32
+  %r37 = fptosi float poison to i32
+  %r38 = fptoui float poison to i64
+  %r39 = fptosi float poison to i64
+  %r40 = fptoui double poison to i1
+  %r41 = fptosi double poison to i1
+  %r42 = fptoui double poison to i8
+  %r43 = fptosi double poison to i8
+  %r44 = fptoui double poison to i16
+  %r45 = fptosi double poison to i16
+  %r46 = fptoui double poison to i32
+  %r47 = fptosi double poison to i32
+  %r48 = fptoui double poison to i64
+  %r49 = fptosi double poison to i64
+  %r50 = sitofp i1 poison to float
+  %r51 = uitofp i1 poison to float
+  %r52 = sitofp i1 poison to double
+  %r53 = uitofp i1 poison to double
+  %r54 = sitofp i8 poison to float
+  %r55 = uitofp i8 poison to float
+  %r56 = sitofp i8 poison to double
+  %r57 = uitofp i8 poison to double
+  %r58 = sitofp i16 poison to float
+  %r59 = uitofp i16 poison to float
+  %r60 = sitofp i16 poison to double
+  %r61 = uitofp i16 poison to double
+  %r62 = sitofp i32 poison to float
+  %r63 = uitofp i32 poison to float
+  %r64 = sitofp i32 poison to double
+  %r65 = uitofp i32 poison to double
+  %r66 = sitofp i64 poison to float
+  %r67 = uitofp i64 poison to float
+  %r68 = sitofp i64 poison to double
+  %r69 = uitofp i64 poison to double
+  %r80 = fptrunc double poison to float
+  %r81 = fptrunc <2 x double> poison to <2 x float>
+  %r82 = fptrunc <4 x double> poison to <4 x float>
+  %r83 = fptrunc <8 x double> poison to <8 x float>
+  %r84 = fptrunc <16 x double> poison to <16 x float>
+  %r85 = fpext float poison to double
+  %r86 = fpext <2 x float> poison to <2 x double>
+  %r87 = fpext <4 x float> poison to <4 x double>
+  %r88 = fpext <8 x float> poison to <8 x double>
+  %r89 = fpext <16 x float> poison to <16 x double>
+  %r90 = fptoui <2 x float> poison to <2 x i1>
+  %r91 = fptosi <2 x float> poison to <2 x i1>
+  %r92 = fptoui <2 x float> poison to <2 x i8>
+  %r93 = fptosi <2 x float> poison to <2 x i8>
+  %r94 = fptoui <2 x float> poison to <2 x i16>
+  %r95 = fptosi <2 x float> poison to <2 x i16>
+  %r96 = fptoui <2 x float> poison to <2 x i32>
+  %r97 = fptosi <2 x float> poison to <2 x i32>
+  %r98 = fptoui <2 x float> poison to <2 x i64>
+  %r99 = fptosi <2 x float> poison to <2 x i64>
+  %r100 = fptoui <2 x double> poison to <2 x i1>
+  %r101 = fptosi <2 x double> poison to <2 x i1>
+  %r102 = fptoui <2 x double> poison to <2 x i8>
+  %r103 = fptosi <2 x double> poison to <2 x i8>
+  %r104 = fptoui <2 x double> poison to <2 x i16>
+  %r105 = fptosi <2 x double> poison to <2 x i16>
+  %r106 = fptoui <2 x double> poison to <2 x i32>
+  %r107 = fptosi <2 x double> poison to <2 x i32>
+  %r108 = fptoui <2 x double> poison to <2 x i64>
+  %r109 = fptosi <2 x double> poison to <2 x i64>
 
-  %r110 = fptoui <4 x float> undef to <4 x i1>
-  %r111 = fptosi <4 x float> undef to <4 x i1>
-  %r112 = fptoui <4 x float> undef to <4 x i8>
-  %r113 = fptosi <4 x float> undef to <4 x i8>
-  %r114 = fptoui <4 x float> undef to <4 x i16>
-  %r115 = fptosi <4 x float> undef to <4 x i16>
-  %r116 = fptoui <4 x float> undef to <4 x i32>
-  %r117 = fptosi <4 x float> undef to <4 x i32>
-  %r118 = fptoui <4 x float> undef to <4 x i64>
-  %r119 = fptosi <4 x float> undef to <4 x i64>
+  %r110 = fptoui <4 x float> poison to <4 x i1>
+  %r111 = fptosi <4 x float> poison to <4 x i1>
+  %r112 = fptoui <4 x float> poison to <4 x i8>
+  %r113 = fptosi <4 x float> poison to <4 x i8>
+  %r114 = fptoui <4 x float> poison to <4 x i16>
+  %r115 = fptosi <4 x float> poison to <4 x i16>
+  %r116 = fptoui <4 x float> poison to <4 x i32>
+  %r117 = fptosi <4 x float> poison to <4 x i32>
+  %r118 = fptoui <4 x float> poison to <4 x i64>
+  %r119 = fptosi <4 x float> poison to <4 x i64>
 
-  %r120 = fptoui <4 x double> undef to <4 x i1>
-  %r121 = fptosi <4 x double> undef to <4 x i1>
-  %r122 = fptoui <4 x double> undef to <4 x i8>
-  %r123 = fptosi <4 x double> undef to <4 x i8>
-  %r124 = fptoui <4 x double> undef to <4 x i16>
-  %r125 = fptosi <4 x double> undef to <4 x i16>
-  %r126 = fptoui <4 x double> undef to <4 x i32>
-  %r127 = fptosi <4 x double> undef to <4 x i32>
-  %r128 = fptoui <4 x double> undef to <4 x i64>
-  %r129 = fptosi <4 x double> undef to <4 x i64>
+  %r120 = fptoui <4 x double> poison to <4 x i1>
+  %r121 = fptosi <4 x double> poison to <4 x i1>
+  %r122 = fptoui <4 x double> poison to <4 x i8>
+  %r123 = fptosi <4 x double> poison to <4 x i8>
+  %r124 = fptoui <4 x double> poison to <4 x i16>
+  %r125 = fptosi <4 x double> poison to <4 x i16>
+  %r126 = fptoui <4 x double> poison to <4 x i32>
+  %r127 = fptosi <4 x double> poison to <4 x i32>
+  %r128 = fptoui <4 x double> poison to <4 x i64>
+  %r129 = fptosi <4 x double> poison to <4 x i64>
 
-  %r130 = fptoui <8 x float> undef to <8 x i1>
-  %r131 = fptosi <8 x float> undef to <8 x i1>
-  %r132 = fptoui <8 x float> undef to <8 x i8>
-  %r133 = fptosi <8 x float> undef to <8 x i8>
-  %r134 = fptoui <8 x float> undef to <8 x i16>
-  %r135 = fptosi <8 x float> undef to <8 x i16>
-  %r136 = fptoui <8 x float> undef to <8 x i32>
-  %r137 = fptosi <8 x float> undef to <8 x i32>
-  %r138 = fptoui <8 x float> undef to <8 x i64>
-  %r139 = fptosi <8 x float> undef to <8 x i64>
+  %r130 = fptoui <8 x float> poison to <8 x i1>
+  %r131 = fptosi <8 x float> poison to <8 x i1>
+  %r132 = fptoui <8 x float> poison to <8 x i8>
+  %r133 = fptosi <8 x float> poison to <8 x i8>
+  %r134 = fptoui <8 x float> poison to <8 x i16>
+  %r135 = fptosi <8 x float> poison to <8 x i16>
+  %r136 = fptoui <8 x float> poison to <8 x i32>
+  %r137 = fptosi <8 x float> poison to <8 x i32>
+  %r138 = fptoui <8 x float> poison to <8 x i64>
+  %r139 = fptosi <8 x float> poison to <8 x i64>
 
-  %r140 = fptoui <8 x double> undef to <8 x i1>
-  %r141 = fptosi <8 x double> undef to <8 x i1>
-  %r142 = fptoui <8 x double> undef to <8 x i8>
-  %r143 = fptosi <8 x double> undef to <8 x i8>
-  %r144 = fptoui <8 x double> undef to <8 x i16>
-  %r145 = fptosi <8 x double> undef to <8 x i16>
-  %r146 = fptoui <8 x double> undef to <8 x i32>
-  %r147 = fptosi <8 x double> undef to <8 x i32>
-  %r148 = fptoui <8 x double> undef to <8 x i64>
-  %r149 = fptosi <8 x double> undef to <8 x i64>
+  %r140 = fptoui <8 x double> poison to <8 x i1>
+  %r141 = fptosi <8 x double> poison to <8 x i1>
+  %r142 = fptoui <8 x double> poison to <8 x i8>
+  %r143 = fptosi <8 x double> poison to <8 x i8>
+  %r144 = fptoui <8 x double> poison to <8 x i16>
+  %r145 = fptosi <8 x double> poison to <8 x i16>
+  %r146 = fptoui <8 x double> poison to <8 x i32>
+  %r147 = fptosi <8 x double> poison to <8 x i32>
+  %r148 = fptoui <8 x double> poison to <8 x i64>
+  %r149 = fptosi <8 x double> poison to <8 x i64>
 
-  %r150 = fptoui <16 x float> undef to <16 x i1>
-  %r151 = fptosi <16 x float> undef to <16 x i1>
-  %r152 = fptoui <16 x float> undef to <16 x i8>
-  %r153 = fptosi <16 x float> undef to <16 x i8>
-  %r154 = fptoui <16 x float> undef to <16 x i16>
-  %r155 = fptosi <16 x float> undef to <16 x i16>
-  %r156 = fptoui <16 x float> undef to <16 x i32>
-  %r157 = fptosi <16 x float> undef to <16 x i32>
-  %r158 = fptoui <16 x float> undef to <16 x i64>
-  %r159 = fptosi <16 x float> undef to <16 x i64>
+  %r150 = fptoui <16 x float> poison to <16 x i1>
+  %r151 = fptosi <16 x float> poison to <16 x i1>
+  %r152 = fptoui <16 x float> poison to <16 x i8>
+  %r153 = fptosi <16 x float> poison to <16 x i8>
+  %r154 = fptoui <16 x float> poison to <16 x i16>
+  %r155 = fptosi <16 x float> poison to <16 x i16>
+  %r156 = fptoui <16 x float> poison to <16 x i32>
+  %r157 = fptosi <16 x float> poison to <16 x i32>
+  %r158 = fptoui <16 x float> poison to <16 x i64>
+  %r159 = fptosi <16 x float> poison to <16 x i64>
 
-  %r160 = fptoui <16 x double> undef to <16 x i1>
-  %r161 = fptosi <16 x double> undef to <16 x i1>
-  %r162 = fptoui <16 x double> undef to <16 x i8>
-  %r163 = fptosi <16 x double> undef to <16 x i8>
-  %r164 = fptoui <16 x double> undef to <16 x i16>
-  %r165 = fptosi <16 x double> undef to <16 x i16>
-  %r166 = fptoui <16 x double> undef to <16 x i32>
-  %r167 = fptosi <16 x double> undef to <16 x i32>
-  %r168 = fptoui <16 x double> undef to <16 x i64>
-  %r169 = fptosi <16 x double> undef to <16 x i64>
+  %r160 = fptoui <16 x double> poison to <16 x i1>
+  %r161 = fptosi <16 x double> poison to <16 x i1>
+  %r162 = fptoui <16 x double> poison to <16 x i8>
+  %r163 = fptosi <16 x double> poison to <16 x i8>
+  %r164 = fptoui <16 x double> poison to <16 x i16>
+  %r165 = fptosi <16 x double> poison to <16 x i16>
+  %r166 = fptoui <16 x double> poison to <16 x i32>
+  %r167 = fptosi <16 x double> poison to <16 x i32>
+  %r168 = fptoui <16 x double> poison to <16 x i64>
+  %r169 = fptosi <16 x double> poison to <16 x i64>
 
-  %r170 = uitofp <2 x i1> undef to <2 x float>
-  %r171 = sitofp <2 x i1> undef to <2 x float>
-  %r172 = uitofp <2 x i8> undef to <2 x float>
-  %r173 = sitofp <2 x i8> undef to <2 x float>
-  %r174 = uitofp <2 x i16> undef to <2 x float>
-  %r175 = sitofp <2 x i16> undef to <2 x float>
-  %r176 = uitofp <2 x i32> undef to <2 x float>
-  %r177 = sitofp <2 x i32> undef to <2 x float>
-  %r178 = uitofp <2 x i64> undef to <2 x float>
-  %r179 = sitofp <2 x i64> undef to <2 x float>
+  %r170 = uitofp <2 x i1> poison to <2 x float>
+  %r171 = sitofp <2 x i1> poison to <2 x float>
+  %r172 = uitofp <2 x i8> poison to <2 x float>
+  %r173 = sitofp <2 x i8> poison to <2 x float>
+  %r174 = uitofp <2 x i16> poison to <2 x float>
+  %r175 = sitofp <2 x i16> poison to <2 x float>
+  %r176 = uitofp <2 x i32> poison to <2 x float>
+  %r177 = sitofp <2 x i32> poison to <2 x float>
+  %r178 = uitofp <2 x i64> poison to <2 x float>
+  %r179 = sitofp <2 x i64> poison to <2 x float>
 
-  %r180 = uitofp <2 x i1> undef to <2 x double>
-  %r181 = sitofp <2 x i1> undef to <2 x double>
-  %r182 = uitofp <2 x i8> undef to <2 x double>
-  %r183 = sitofp <2 x i8> undef to <2 x double>
-  %r184 = uitofp <2 x i16> undef to <2 x double>
-  %r185 = sitofp <2 x i16> undef to <2 x double>
-  %r186 = uitofp <2 x i32> undef to <2 x double>
-  %r187 = sitofp <2 x i32> undef to <2 x double>
-  %r188 = uitofp <2 x i64> undef to <2 x double>
-  %r189 = sitofp <2 x i64> undef to <2 x double>
+  %r180 = uitofp <2 x i1> poison to <2 x double>
+  %r181 = sitofp <2 x i1> poison to <2 x double>
+  %r182 = uitofp <2 x i8> poison to <2 x double>
+  %r183 = sitofp <2 x i8> poison to <2 x double>
+  %r184 = uitofp <2 x i16> poison to <2 x double>
+  %r185 = sitofp <2 x i16> poison to <2 x double>
+  %r186 = uitofp <2 x i32> poison to <2 x double>
+  %r187 = sitofp <2 x i32> poison to <2 x double>
+  %r188 = uitofp <2 x i64> poison to <2 x double>
+  %r189 = sitofp <2 x i64> poison to <2 x double>
 
-  %r190 = uitofp <4 x i1> undef to <4 x float>
-  %r191 = sitofp <4 x i1> undef to <4 x float>
-  %r192 = uitofp <4 x i8> undef to <4 x float>
-  %r193 = sitofp <4 x i8> undef to <4 x float>
-  %r194 = uitofp <4 x i16> undef to <4 x float>
-  %r195 = sitofp <4 x i16> undef to <4 x float>
-  %r196 = uitofp <4 x i32> undef to <4 x float>
-  %r197 = sitofp <4 x i32> undef to <4 x float>
-  %r198 = uitofp <4 x i64> undef to <4 x float>
-  %r199 = sitofp <4 x i64> undef to <4 x float>
+  %r190 = uitofp <4 x i1> poison to <4 x float>
+  %r191 = sitofp <4 x i1> poison to <4 x float>
+  %r192 = uitofp <4 x i8> poison to <4 x float>
+  %r193 = sitofp <4 x i8> poison to <4 x float>
+  %r194 = uitofp <4 x i16> poison to <4 x float>
+  %r195 = sitofp <4 x i16> poison to <4 x float>
+  %r196 = uitofp <4 x i32> poison to <4 x float>
+  %r197 = sitofp <4 x i32> poison to <4 x float>
+  %r198 = uitofp <4 x i64> poison to <4 x float>
+  %r199 = sitofp <4 x i64> poison to <4 x float>
 
-  %r200 = uitofp <4 x i1> undef to <4 x double>
-  %r201 = sitofp <4 x i1> undef to <4 x double>
-  %r202 = uitofp <4 x i8> undef to <4 x double>
-  %r203 = sitofp <4 x i8> undef to <4 x double>
-  %r204 = uitofp <4 x i16> undef to <4 x double>
-  %r205 = sitofp <4 x i16> undef to <4 x double>
-  %r206 = uitofp <4 x i32> undef to <4 x double>
-  %r207 = sitofp <4 x i32> undef to <4 x double>
-  %r208 = uitofp <4 x i64> undef to <4 x double>
-  %r209 = sitofp <4 x i64> undef to <4 x double>
+  %r200 = uitofp <4 x i1> poison to <4 x double>
+  %r201 = sitofp <4 x i1> poison to <4 x double>
+  %r202 = uitofp <4 x i8> poison to <4 x double>
+  %r203 = sitofp <4 x i8> poison to <4 x double>
+  %r204 = uitofp <4 x i16> poison to <4 x double>
+  %r205 = sitofp <4 x i16> poison to <4 x double>
+  %r206 = uitofp <4 x i32> poison to <4 x double>
+  %r207 = sitofp <4 x i32> poison to <4 x double>
+  %r208 = uitofp <4 x i64> poison to <4 x double>
+  %r209 = sitofp <4 x i64> poison to <4 x double>
 
-  %r210 = uitofp <8 x i1> undef to <8 x float>
-  %r211 = sitofp <8 x i1> undef to <8 x float>
-  %r212 = uitofp <8 x i8> undef to <8 x float>
-  %r213 = sitofp <8 x i8> undef to <8 x float>
-  %r214 = uitofp <8 x i16> undef to <8 x float>
-  %r215 = sitofp <8 x i16> undef to <8 x float>
-  %r216 = uitofp <8 x i32> undef to <8 x float>
-  %r217 = sitofp <8 x i32> undef to <8 x float>
-  %r218 = uitofp <8 x i64> undef to <8 x float>
-  %r219 = sitofp <8 x i64> undef to <8 x float>
+  %r210 = uitofp <8 x i1> poison to <8 x float>
+  %r211 = sitofp <8 x i1> poison to <8 x float>
+  %r212 = uitofp <8 x i8> poison to <8 x float>
+  %r213 = sitofp <8 x i8> poison to <8 x float>
+  %r214 = uitofp <8 x i16> poison to <8 x float>
+  %r215 = sitofp <8 x i16> poison to <8 x float>
+  %r216 = uitofp <8 x i32> poison to <8 x float>
+  %r217 = sitofp <8 x i32> poison to <8 x float>
+  %r218 = uitofp <8 x i64> poison to <8 x float>
+  %r219 = sitofp <8 x i64> poison to <8 x float>
 
-  %r220 = uitofp <8 x i1> undef to <8 x double>
-  %r221 = sitofp <8 x i1> undef to <8 x double>
-  %r222 = uitofp <8 x i8> undef to <8 x double>
-  %r223 = sitofp <8 x i8> undef to <8 x double>
-  %r224 = uitofp <8 x i16> undef to <8 x double>
-  %r225 = sitofp <8 x i16> undef to <8 x double>
-  %r226 = uitofp <8 x i32> undef to <8 x double>
-  %r227 = sitofp <8 x i32> undef to <8 x double>
-  %r228 = uitofp <8 x i64> undef to <8 x double>
-  %r229 = sitofp <8 x i64> undef to <8 x double>
+  %r220 = uitofp <8 x i1> poison to <8 x double>
+  %r221 = sitofp <8 x i1> poison to <8 x double>
+  %r222 = uitofp <8 x i8> poison to <8 x double>
+  %r223 = sitofp <8 x i8> poison to <8 x double>
+  %r224 = uitofp <8 x i16> poison to <8 x double>
+  %r225 = sitofp <8 x i16> poison to <8 x double>
+  %r226 = uitofp <8 x i32> poison to <8 x double>
+  %r227 = sitofp <8 x i32> poison to <8 x double>
+  %r228 = uitofp <8 x i64> poison to <8 x double>
+  %r229 = sitofp <8 x i64> poison to <8 x double>
 
-  %r230 = uitofp <16 x i1> undef to <16 x float>
-  %r231 = sitofp <16 x i1> undef to <16 x float>
-  %r232 = uitofp <16 x i8> undef to <16 x float>
-  %r233 = sitofp <16 x i8> undef to <16 x float>
-  %r234 = uitofp <16 x i16> undef to <16 x float>
-  %r235 = sitofp <16 x i16> undef to <16 x float>
-  %r236 = uitofp <16 x i32> undef to <16 x float>
-  %r237 = sitofp <16 x i32> undef to <16 x float>
-  %r238 = uitofp <16 x i64> undef to <16 x float>
-  %r239 = sitofp <16 x i64> undef to <16 x float>
+  %r230 = uitofp <16 x i1> poison to <16 x float>
+  %r231 = sitofp <16 x i1> poison to <16 x float>
+  %r232 = uitofp <16 x i8> poison to <16 x float>
+  %r233 = sitofp <16 x i8> poison to <16 x float>
+  %r234 = uitofp <16 x i16> poison to <16 x float>
+  %r235 = sitofp <16 x i16> poison to <16 x float>
+  %r236 = uitofp <16 x i32> poison to <16 x float>
+  %r237 = sitofp <16 x i32> poison to <16 x float>
+  %r238 = uitofp <16 x i64> poison to <16 x float>
+  %r239 = sitofp <16 x i64> poison to <16 x float>
 
-  %r240 = uitofp <16 x i1> undef to <16 x double>
-  %r241 = sitofp <16 x i1> undef to <16 x double>
-  %r242 = uitofp <16 x i8> undef to <16 x double>
-  %r243 = sitofp <16 x i8> undef to <16 x double>
-  %r244 = uitofp <16 x i16> undef to <16 x double>
-  %r245 = sitofp <16 x i16> undef to <16 x double>
-  %r246 = uitofp <16 x i32> undef to <16 x double>
-  %r247 = sitofp <16 x i32> undef to <16 x double>
-  %r248 = uitofp <16 x i64> undef to <16 x double>
-  %r249 = sitofp <16 x i64> undef to <16 x double>
+  %r240 = uitofp <16 x i1> poison to <16 x double>
+  %r241 = sitofp <16 x i1> poison to <16 x double>
+  %r242 = uitofp <16 x i8> poison to <16 x double>
+  %r243 = sitofp <16 x i8> poison to <16 x double>
+  %r244 = uitofp <16 x i16> poison to <16 x double>
+  %r245 = sitofp <16 x i16> poison to <16 x double>
+  %r246 = uitofp <16 x i32> poison to <16 x double>
+  %r247 = sitofp <16 x i32> poison to <16 x double>
+  %r248 = uitofp <16 x i64> poison to <16 x double>
+  %r249 = sitofp <16 x i64> poison to <16 x double>
 
   ret i32 undef
 }
@@ -1760,24 +1760,24 @@ define i32 @casts_with_users(i8 %a, i16 %b, i32 %c, i64 %d, i1 %e) {
 
 define i32 @bitcasts() {
 ; CHECK-LABEL: 'bitcasts'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %a = bitcast i32 undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %b = bitcast float undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c = bitcast i32 undef to float
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %d = bitcast float undef to i32
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %e = bitcast i64 undef to double
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f = bitcast double undef to i64
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %g = bitcast half undef to i16
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %h = bitcast i16 undef to half
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %a = bitcast i32 poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %b = bitcast float poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c = bitcast i32 poison to float
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %d = bitcast float poison to i32
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %e = bitcast i64 poison to double
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f = bitcast double poison to i64
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %g = bitcast half poison to i16
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %h = bitcast i16 poison to half
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
-  %a = bitcast i32 undef to i32
-  %b = bitcast float undef to float
-  %c = bitcast i32 undef to float
-  %d = bitcast float undef to i32
-  %e = bitcast i64 undef to double
-  %f = bitcast double undef to i64
-  %g = bitcast half undef to i16
-  %h = bitcast i16 undef to half
+  %a = bitcast i32 poison to i32
+  %b = bitcast float poison to float
+  %c = bitcast i32 poison to float
+  %d = bitcast float poison to i32
+  %e = bitcast i64 poison to double
+  %f = bitcast double poison to i64
+  %g = bitcast half poison to i16
+  %h = bitcast i16 poison to half
   ret i32 undef
 }
 
@@ -2012,31 +2012,31 @@ define i32 @load_extends() #0 {
 
 define i32 @store_truncs() {
 ; CHECK-LABEL: 'store_truncs'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r0 = trunc i64 undef to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r0 = trunc i64 poison to i8
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i8 %r0, ptr undef, align 1
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r1 = trunc i64 undef to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r1 = trunc i64 poison to i16
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i16 %r1, ptr undef, align 2
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r2 = trunc i64 undef to i32
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r2 = trunc i64 poison to i32
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i32 %r2, ptr undef, align 4
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r3 = trunc i32 undef to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r3 = trunc i32 poison to i8
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i8 %r3, ptr undef, align 1
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r4 = trunc i32 undef to i16
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r4 = trunc i32 poison to i16
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i16 %r4, ptr undef, align 2
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r5 = trunc i16 undef to i8
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %r5 = trunc i16 poison to i8
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: store i8 %r5, ptr undef, align 1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
-  %r0 = trunc i64 undef to i8
+  %r0 = trunc i64 poison to i8
   store i8 %r0, ptr undef
-  %r1 = trunc i64 undef to i16
+  %r1 = trunc i64 poison to i16
   store i16 %r1, ptr undef
-  %r2 = trunc i64 undef to i32
+  %r2 = trunc i64 poison to i32
   store i32 %r2, ptr undef
-  %r3 = trunc i32 undef to i8
+  %r3 = trunc i32 poison to i8
   store i8 %r3, ptr undef
-  %r4 = trunc i32 undef to i16
+  %r4 = trunc i32 poison to i16
   store i16 %r4, ptr undef
-  %r5 = trunc i16 undef to i8
+  %r5 = trunc i16 poison to i8
   store i8 %r5, ptr undef
   ret i32 undef
 }
@@ -2084,372 +2084,372 @@ declare void @use(i16, i16, i32, i32, i64, i64, i32, i32, i64, i64, i64, i64)
 
 define void @fp16cast() {
 ; CHECK-SVE-LABEL: 'fp16cast'
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half undef to i1
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half undef to i8
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half undef to i16
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half undef to i32
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half undef to i64
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> undef to <2 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> undef to <2 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> undef to <2 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> undef to <2 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> undef to <2 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> undef to <2 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> undef to <4 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> undef to <4 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> undef to <4 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> undef to <4 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> undef to <4 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> undef to <4 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> undef to <8 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> undef to <8 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> undef to <8 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> undef to <8 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> undef to <8 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> undef to <8 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> undef to <16 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> undef to <16 x i1>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> undef to <16 x i8>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> undef to <16 x i16>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> undef to <16 x i32>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> undef to <16 x i64>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> undef to <8 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> undef to <16 x half>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> undef to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half poison to i1
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half poison to i8
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half poison to i16
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half poison to i32
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half poison to i64
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> poison to <2 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> poison to <2 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> poison to <2 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:21 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> poison to <4 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> poison to <8 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:43 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> poison to <16 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:81 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> poison to <16 x i1>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> poison to <16 x i8>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> poison to <16 x i16>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:86 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half>
 ; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; SVE128-NO-NEON-LABEL: 'fp16cast'
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half undef to i1
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half undef to i8
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half undef to i16
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half undef to i32
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half undef to i64
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> undef to <2 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> undef to <2 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> undef to <2 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> undef to <2 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> undef to <2 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r98 = fptoui <2 x half> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r99 = fptosi <2 x half> undef to <2 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> undef to <4 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> undef to <4 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> undef to <4 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> undef to <4 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x half> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x half> undef to <4 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> undef to <4 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r130 = fptoui <8 x half> undef to <8 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r131 = fptosi <8 x half> undef to <8 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r132 = fptoui <8 x half> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r133 = fptosi <8 x half> undef to <8 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> undef to <8 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> undef to <8 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> undef to <8 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> undef to <16 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> undef to <16 x i1>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> undef to <16 x i8>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> undef to <16 x i16>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> undef to <16 x i32>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> undef to <16 x i64>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> undef to <8 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> undef to <16 x half>
-; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> undef to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half poison to i1
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half poison to i8
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half poison to i16
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half poison to i32
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half poison to i64
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> poison to <2 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> poison to <2 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> poison to <2 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r98 = fptoui <2 x half> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r99 = fptosi <2 x half> poison to <2 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r116 = fptoui <4 x half> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r117 = fptosi <4 x half> poison to <4 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r118 = fptoui <4 x half> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r119 = fptosi <4 x half> poison to <4 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r130 = fptoui <8 x half> poison to <8 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r131 = fptosi <8 x half> poison to <8 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r132 = fptoui <8 x half> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r133 = fptosi <8 x half> poison to <8 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r136 = fptoui <8 x half> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r137 = fptosi <8 x half> poison to <8 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r150 = fptoui <16 x half> poison to <16 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r151 = fptosi <16 x half> poison to <16 x i1>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r152 = fptoui <16 x half> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r153 = fptosi <16 x half> poison to <16 x i8>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r154 = fptoui <16 x half> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r155 = fptosi <16 x half> poison to <16 x i16>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r256 = uitofp <8 x i32> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r257 = sitofp <8 x i32> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r260 = uitofp <16 x i1> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r261 = sitofp <16 x i1> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r264 = uitofp <16 x i16> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r265 = sitofp <16 x i16> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half>
+; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half>
 ; SVE128-NO-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; FIXED-MIN-256-LABEL: 'fp16cast'
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half undef to i1
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half undef to i8
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half undef to i16
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half undef to i32
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half undef to i64
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> undef to <2 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> undef to <2 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> undef to <2 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> undef to <2 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> undef to <2 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> undef to <2 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> undef to <4 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> undef to <4 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> undef to <4 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> undef to <4 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> undef to <4 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r118 = fptoui <4 x half> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r119 = fptosi <4 x half> undef to <4 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> undef to <8 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> undef to <8 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> undef to <8 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> undef to <8 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r136 = fptoui <8 x half> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r137 = fptosi <8 x half> undef to <8 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> undef to <8 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r150 = fptoui <16 x half> undef to <16 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r151 = fptosi <16 x half> undef to <16 x i1>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r152 = fptoui <16 x half> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r153 = fptosi <16 x half> undef to <16 x i8>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r154 = fptoui <16 x half> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r155 = fptosi <16 x half> undef to <16 x i16>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> undef to <16 x i32>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> undef to <16 x i64>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r256 = uitofp <8 x i32> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r257 = sitofp <8 x i32> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> undef to <8 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r260 = uitofp <16 x i1> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r261 = sitofp <16 x i1> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r264 = uitofp <16 x i16> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r265 = sitofp <16 x i16> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> undef to <16 x half>
-; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> undef to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half poison to i1
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half poison to i8
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half poison to i16
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half poison to i32
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half poison to i64
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> poison to <2 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> poison to <2 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> poison to <2 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r118 = fptoui <4 x half> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r119 = fptosi <4 x half> poison to <4 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r136 = fptoui <8 x half> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r137 = fptosi <8 x half> poison to <8 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r138 = fptoui <8 x half> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r139 = fptosi <8 x half> poison to <8 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r150 = fptoui <16 x half> poison to <16 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r151 = fptosi <16 x half> poison to <16 x i1>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r152 = fptoui <16 x half> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r153 = fptosi <16 x half> poison to <16 x i8>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r154 = fptoui <16 x half> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r155 = fptosi <16 x half> poison to <16 x i16>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r156 = fptoui <16 x half> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r157 = fptosi <16 x half> poison to <16 x i32>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r158 = fptoui <16 x half> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r159 = fptosi <16 x half> poison to <16 x i64>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r256 = uitofp <8 x i32> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r257 = sitofp <8 x i32> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r258 = uitofp <8 x i64> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r259 = sitofp <8 x i64> poison to <8 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r260 = uitofp <16 x i1> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r261 = sitofp <16 x i1> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r264 = uitofp <16 x i16> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of 1 for: %r265 = sitofp <16 x i16> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r266 = uitofp <16 x i32> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r267 = sitofp <16 x i32> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r268 = uitofp <16 x i64> poison to <16 x half>
+; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r269 = sitofp <16 x i64> poison to <16 x half>
 ; FIXED-MIN-256-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; FIXED-MIN-2048-LABEL: 'fp16cast'
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half undef to i1
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half undef to i8
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half undef to i16
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half undef to i32
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half undef to i64
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> undef to <2 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> undef to <2 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> undef to <2 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> undef to <2 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> undef to <2 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> undef to <2 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> undef to <4 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> undef to <4 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> undef to <4 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> undef to <4 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> undef to <4 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r118 = fptoui <4 x half> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r119 = fptosi <4 x half> undef to <4 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> undef to <8 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> undef to <8 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> undef to <8 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> undef to <8 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r136 = fptoui <8 x half> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r137 = fptosi <8 x half> undef to <8 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r138 = fptoui <8 x half> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r139 = fptosi <8 x half> undef to <8 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r150 = fptoui <16 x half> undef to <16 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r151 = fptosi <16 x half> undef to <16 x i1>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r152 = fptoui <16 x half> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r153 = fptosi <16 x half> undef to <16 x i8>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r154 = fptoui <16 x half> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r155 = fptosi <16 x half> undef to <16 x i16>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r156 = fptoui <16 x half> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r157 = fptosi <16 x half> undef to <16 x i32>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r158 = fptoui <16 x half> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r159 = fptosi <16 x half> undef to <16 x i64>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r256 = uitofp <8 x i32> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r257 = sitofp <8 x i32> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r258 = uitofp <8 x i64> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r259 = sitofp <8 x i64> undef to <8 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r260 = uitofp <16 x i1> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r261 = sitofp <16 x i1> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r264 = uitofp <16 x i16> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r265 = sitofp <16 x i16> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r266 = uitofp <16 x i32> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r267 = sitofp <16 x i32> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r268 = uitofp <16 x i64> undef to <16 x half>
-; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r269 = sitofp <16 x i64> undef to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r30 = fptoui half poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r31 = fptosi half poison to i1
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r32 = fptoui half poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r33 = fptosi half poison to i8
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r34 = fptoui half poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r35 = fptosi half poison to i16
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r36 = fptoui half poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r37 = fptosi half poison to i32
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r38 = fptoui half poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r39 = fptosi half poison to i64
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r90 = fptoui <2 x half> poison to <2 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r91 = fptosi <2 x half> poison to <2 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r92 = fptoui <2 x half> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r93 = fptosi <2 x half> poison to <2 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r94 = fptoui <2 x half> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r95 = fptosi <2 x half> poison to <2 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r96 = fptoui <2 x half> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r97 = fptosi <2 x half> poison to <2 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r98 = fptoui <2 x half> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r99 = fptosi <2 x half> poison to <2 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r110 = fptoui <4 x half> poison to <4 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r111 = fptosi <4 x half> poison to <4 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r112 = fptoui <4 x half> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r113 = fptosi <4 x half> poison to <4 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r114 = fptoui <4 x half> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r115 = fptosi <4 x half> poison to <4 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r116 = fptoui <4 x half> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r117 = fptosi <4 x half> poison to <4 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r118 = fptoui <4 x half> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r119 = fptosi <4 x half> poison to <4 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r130 = fptoui <8 x half> poison to <8 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:1 Lat:1 SizeLat:1 for: %r131 = fptosi <8 x half> poison to <8 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r132 = fptoui <8 x half> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r133 = fptosi <8 x half> poison to <8 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r134 = fptoui <8 x half> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r135 = fptosi <8 x half> poison to <8 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r136 = fptoui <8 x half> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r137 = fptosi <8 x half> poison to <8 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r138 = fptoui <8 x half> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r139 = fptosi <8 x half> poison to <8 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r150 = fptoui <16 x half> poison to <16 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r151 = fptosi <16 x half> poison to <16 x i1>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r152 = fptoui <16 x half> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r153 = fptosi <16 x half> poison to <16 x i8>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r154 = fptoui <16 x half> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r155 = fptosi <16 x half> poison to <16 x i16>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r156 = fptoui <16 x half> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r157 = fptosi <16 x half> poison to <16 x i32>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r158 = fptoui <16 x half> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r159 = fptosi <16 x half> poison to <16 x i64>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r250 = uitofp <8 x i1> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r251 = sitofp <8 x i1> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r252 = uitofp <8 x i8> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r253 = sitofp <8 x i8> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r254 = uitofp <8 x i16> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r255 = sitofp <8 x i16> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r256 = uitofp <8 x i32> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r257 = sitofp <8 x i32> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r258 = uitofp <8 x i64> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r259 = sitofp <8 x i64> poison to <8 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r260 = uitofp <16 x i1> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r261 = sitofp <16 x i1> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r262 = uitofp <16 x i8> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r263 = sitofp <16 x i8> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r264 = uitofp <16 x i16> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r265 = sitofp <16 x i16> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r266 = uitofp <16 x i32> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r267 = sitofp <16 x i32> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r268 = uitofp <16 x i64> poison to <16 x half>
+; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of 1 for: %r269 = sitofp <16 x i64> poison to <16 x half>
 ; FIXED-MIN-2048-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %r30 = fptoui half undef to i1
-  %r31 = fptosi half undef to i1
-  %r32 = fptoui half undef to i8
-  %r33 = fptosi half undef to i8
-  %r34 = fptoui half undef to i16
-  %r35 = fptosi half undef to i16
-  %r36 = fptoui half undef to i32
-  %r37 = fptosi half undef to i32
-  %r38 = fptoui half undef to i64
-  %r39 = fptosi half undef to i64
+  %r30 = fptoui half poison to i1
+  %r31 = fptosi half poison to i1
+  %r32 = fptoui half poison to i8
+  %r33 = fptosi half poison to i8
+  %r34 = fptoui half poison to i16
+  %r35 = fptosi half poison to i16
+  %r36 = fptoui half poison to i32
+  %r37 = fptosi half poison to i32
+  %r38 = fptoui half poison to i64
+  %r39 = fptosi half poison to i64
 
-  %r90 = fptoui <2 x half> undef to <2 x i1>
-  %r91 = fptosi <2 x half> undef to <2 x i1>
-  %r92 = fptoui <2 x half> undef to <2 x i8>
-  %r93 = fptosi <2 x half> undef to <2 x i8>
-  %r94 = fptoui <2 x half> undef to <2 x i16>
-  %r95 = fptosi <2 x half> undef to <2 x i16>
-  %r96 = fptoui <2 x half> undef to <2 x i32>
-  %r97 = fptosi <2 x half> undef to <2 x i32>
-  %r98 = fptoui <2 x half> undef to <2 x i64>
-  %r99 = fptosi <2 x half> undef to <2 x i64>
+  %r90 = fptoui <2 x half> poison to <2 x i1>
+  %r91 = fptosi <2 x half> poison to <2 x i1>
+  %r92 = fptoui <2 x half> poison to <2 x i8>
+  %r93 = fptosi <2 x half> poison to <2 x i8>
+  %r94 = fptoui <2 x half> poison to <2 x i16>
+  %r95 = fptosi <2 x half> poison to <2 x i16>
+  %r96 = fptoui <2 x half> poison to <2 x i32>
+  %r97 = fptosi <2 x half> poison to <2 x i32>
+  %r98 = fptoui <2 x half> poison to <2 x i64>
+  %r99 = fptosi <2 x half> poison to <2 x i64>
 
-  %r110 = fptoui <4 x half> undef to <4 x i1>
-  %r111 = fptosi <4 x half> undef to <4 x i1>
-  %r112 = fptoui <4 x half> undef to <4 x i8>
-  %r113 = fptosi <4 x half> undef to <4 x i8>
-  %r114 = fptoui <4 x half> undef to <4 x i16>
-  %r115 = fptosi <4 x half> undef to <4 x i16>
-  %r116 = fptoui <4 x half> undef to <4 x i32>
-  %r117 = fptosi <4 x half> undef to <4 x i32>
-  %r118 = fptoui <4 x half> undef to <4 x i64>
-  %r119 = fptosi <4 x half> undef to <4 x i64>
+  %r110 = fptoui <4 x half> poison to <4 x i1>
+  %r111 = fptosi <4 x half> poison to <4 x i1>
+  %r112 = fptoui <4 x half> poison to <4 x i8>
+  %r113 = fptosi <4 x half> poison to <4 x i8>
+  %r114 = fptoui <4 x half> poison to <4 x i16>
+  %r115 = fptosi <4 x half> poison to <4 x i16>
+  %r116 = fptoui <4 x half> poison to <4 x i32>
+  %r117 = fptosi <4 x half> poison to <4 x i32>
+  %r118 = fptoui <4 x half> poison to <4 x i64>
+  %r119 = fptosi <4 x half> poison to <4 x i64>
 
-  %r130 = fptoui <8 x half> undef to <8 x i1>
-  %r131 = fptosi <8 x half> undef to <8 x i1>
-  %r132 = fptoui <8 x half> undef to <8 x i8>
-  %r133 = fptosi <8 x half> undef to <8 x i8>
-  %r134 = fptoui <8 x half> undef to <8 x i16>
-  %r135 = fptosi <8 x half> undef to <8 x i16>
-  %r136 = fptoui <8 x half> undef to <8 x i32>
-  %r137 = fptosi <8 x half> undef to <8 x i32>
-  %r138 = fptoui <8 x half> undef to <8 x i64>
-  %r139 = fptosi <8 x half> undef to <8 x i64>
+  %r130 = fptoui <8 x half> poison to <8 x i1>
+  %r131 = fptosi <8 x half> poison to <8 x i1>
+  %r132 = fptoui <8 x half> poison to <8 x i8>
+  %r133 = fptosi <8 x half> poison to <8 x i8>
+  %r134 = fptoui <8 x half> poison to <8 x i16>
+  %r135 = fptosi <8 x half> poison to <8 x i16>
+  %r136 = fptoui <8 x half> poison to <8 x i32>
+  %r137 = fptosi <8 x half> poison to <8 x i32>
+  %r138 = fptoui <8 x half> poison to <8 x i64>
+  %r139 = fptosi <8 x half> poison to <8 x i64>
 
-  %r150 = fptoui <16 x half> undef to <16 x i1>
-  %r151 = fptosi <16 x half> undef to <16 x i1>
-  %r152 = fptoui <16 x half> undef to <16 x i8>
-  %r153 = fptosi <16 x half> undef to <16 x i8>
-  %r154 = fptoui <16 x half> undef to <16 x i16>
-  %r155 = fptosi <16 x half> undef to <16 x i16>
-  %r156 = fptoui <16 x half> undef to <16 x i32>
-  %r157 = fptosi <16 x half> undef to <16 x i32>
-  %r158 = fptoui <16 x half> undef to <16 x i64>
-  %r159 = fptosi <16 x half> undef to <16 x i64>
+  %r150 = fptoui <16 x half> poison to <16 x i1>
+  %r151 = fptosi <16 x half> poison to <16 x i1>
+  %r152 = fptoui <16 x half> poison to <16 x i8>
+  %r153 = fptosi <16 x half> poison to <16 x i8>
+  %r154 = fptoui <16 x half> poison to <16 x i16>
+  %r155 = fptosi <16 x half> poison to <16 x i16>
+  %r156 = fptoui <16 x half> poison to <16 x i32>
+  %r157 = fptosi <16 x half> poison to <16 x i32>
+  %r158 = fptoui <16 x half> poison to <16 x i64>
+  %r159 = fptosi <16 x half> poison to <16 x i64>
 
-  %r250 = uitofp <8 x i1> undef to <8 x half>
-  %r251 = sitofp <8 x i1> undef to <8 x half>
-  %r252 = uitofp <8 x i8> undef to <8 x half>
-  %r253 = sitofp <8 x i8> undef to <8 x half>
-  %r254 = uitofp <8 x i16> undef to <8 x half>
-  %r255 = sitofp <8 x i16> undef to <8 x half>
-  %r256 = uitofp <8 x i32> undef to <8 x half>
-  %r257 = sitofp <8 x i32> undef to <8 x half>
-  %r258 = uitofp <8 x i64> undef to <8 x half>
-  %r259 = sitofp <8 x i64> undef to <8 x half>
+  %r250 = uitofp <8 x i1> poison to <8 x half>
+  %r251 = sitofp <8 x i1> poison to <8 x half>
+  %r252 = uitofp <8 x i8> poison to <8 x half>
+  %r253 = sitofp <8 x i8> poison to <8 x half>
+  %r254 = uitofp <8 x i16> poison to <8 x half>
+  %r255 = sitofp <8 x i16> poison to <8 x half>
+  %r256 = uitofp <8 x i32> poison to <8 x half>
+  %r257 = sitofp <8 x i32> poison to <8 x half>
+  %r258 = uitofp <8 x i64> poison to <8 x half>
+  %r259 = sitofp <8 x i64> poison to <8 x half>
 
-  %r260 = uitofp <16 x i1> undef to <16 x half>
-  %r261 = sitofp <16 x i1> undef to <16 x half>
-  %r262 = uitofp <16 x i8> undef to <16 x half>
-  %r263 = sitofp <16 x i8> undef to <16 x half>
-  %r264 = uitofp <16 x i16> undef to <16 x half>
-  %r265 = sitofp <16 x i16> undef to <16 x half>
-  %r266 = uitofp <16 x i32> undef to <16 x half>
-  %r267 = sitofp <16 x i32> undef to <16 x half>
-  %r268 = uitofp <16 x i64> undef to <16 x half>
-  %r269 = sitofp <16 x i64> undef to <16 x half>
+  %r260 = uitofp <16 x i1> poison to <16 x half>
+  %r261 = sitofp <16 x i1> poison to <16 x half>
+  %r262 = uitofp <16 x i8> poison to <16 x half>
+  %r263 = sitofp <16 x i8> poison to <16 x half>
+  %r264 = uitofp <16 x i16> poison to <16 x half>
+  %r265 = sitofp <16 x i16> poison to <16 x half>
+  %r266 = uitofp <16 x i32> poison to <16 x half>
+  %r267 = sitofp <16 x i32> poison to <16 x half>
+  %r268 = uitofp <16 x i64> poison to <16 x half>
+  %r269 = sitofp <16 x i64> poison to <16 x half>
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll
index b887654..91aaea2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-ext.ll
@@ -5,49 +5,49 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define void @sve_ext() {
 ; CHECK-LABEL: 'sve_ext'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i16_to_i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i16_to_i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i32_to_i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i8_to_i64 = zext <vscale x 4 x i8> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i8_to_i32 = zext <vscale x 8 x i8> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i16_to_i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i8_to_i64 = zext <vscale x 8 x i8> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i16_to_i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i16_to_i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i32_to_i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i8_to_i64 = sext <vscale x 4 x i8> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i8_to_i32 = sext <vscale x 8 x i8> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i16_to_i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i8_to_i64 = sext <vscale x 8 x i8> undef to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> poison to <vscale x 16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i32 = zext <vscale x 16 x i8> poison to <vscale x 16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv16_i8_to_i64 = zext <vscale x 16 x i8> poison to <vscale x 16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i16_to_i32 = zext <vscale x 8 x i16> poison to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i16_to_i64 = zext <vscale x 8 x i16> poison to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i32_to_i64 = zext <vscale x 4 x i32> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i8_to_i64 = zext <vscale x 4 x i8> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i8_to_i32 = zext <vscale x 8 x i8> poison to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv4_i16_to_i64 = zext <vscale x 4 x i16> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %zext_nxv8_i8_to_i64 = zext <vscale x 8 x i8> poison to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i16 = sext <vscale x 16 x i8> poison to <vscale x 16 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i32 = sext <vscale x 16 x i8> poison to <vscale x 16 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv16_i8_to_i64 = sext <vscale x 16 x i8> poison to <vscale x 16 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i16_to_i32 = sext <vscale x 8 x i16> poison to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i16_to_i64 = sext <vscale x 8 x i16> poison to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i32_to_i64 = sext <vscale x 4 x i32> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i8_to_i64 = sext <vscale x 4 x i8> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i8_to_i32 = sext <vscale x 8 x i8> poison to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv4_i16_to_i64 = sext <vscale x 4 x i16> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %sext_nxv8_i8_to_i64 = sext <vscale x 8 x i8> poison to <vscale x 8 x i64>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> undef to <vscale x 16 x i16>
-  %zext_nxv16_i8_to_i32 = zext <vscale x 16 x i8> undef to <vscale x 16 x i32>
-  %zext_nxv16_i8_to_i64 = zext <vscale x 16 x i8> undef to <vscale x 16 x i64>
-  %zext_nxv8_i16_to_i32 = zext <vscale x 8 x i16> undef to <vscale x 8 x i32>
-  %zext_nxv8_i16_to_i64 = zext <vscale x 8 x i16> undef to <vscale x 8 x i64>
-  %zext_nxv4_i32_to_i64 = zext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-  %zext_nxv4_i8_to_i64  = zext <vscale x 4 x i8>  undef to <vscale x 4 x i64>
-  %zext_nxv8_i8_to_i32  = zext <vscale x 8 x i8>  undef to <vscale x 8 x i32>
-  %zext_nxv4_i16_to_i64 = zext <vscale x 4 x i16> undef to <vscale x 4 x i64>
-  %zext_nxv8_i8_to_i64  = zext <vscale x 8 x i8>  undef to <vscale x 8 x i64>
+  %zext_nxv16_i8_to_i16 = zext <vscale x 16 x i8> poison to <vscale x 16 x i16>
+  %zext_nxv16_i8_to_i32 = zext <vscale x 16 x i8> poison to <vscale x 16 x i32>
+  %zext_nxv16_i8_to_i64 = zext <vscale x 16 x i8> poison to <vscale x 16 x i64>
+  %zext_nxv8_i16_to_i32 = zext <vscale x 8 x i16> poison to <vscale x 8 x i32>
+  %zext_nxv8_i16_to_i64 = zext <vscale x 8 x i16> poison to <vscale x 8 x i64>
+  %zext_nxv4_i32_to_i64 = zext <vscale x 4 x i32> poison to <vscale x 4 x i64>
+  %zext_nxv4_i8_to_i64  = zext <vscale x 4 x i8>  poison to <vscale x 4 x i64>
+  %zext_nxv8_i8_to_i32  = zext <vscale x 8 x i8>  poison to <vscale x 8 x i32>
+  %zext_nxv4_i16_to_i64 = zext <vscale x 4 x i16> poison to <vscale x 4 x i64>
+  %zext_nxv8_i8_to_i64  = zext <vscale x 8 x i8>  poison to <vscale x 8 x i64>
 
-  %sext_nxv16_i8_to_i16 = sext <vscale x 16 x i8> undef to <vscale x 16 x i16>
-  %sext_nxv16_i8_to_i32 = sext <vscale x 16 x i8> undef to <vscale x 16 x i32>
-  %sext_nxv16_i8_to_i64 = sext <vscale x 16 x i8> undef to <vscale x 16 x i64>
-  %sext_nxv8_i16_to_i32 = sext <vscale x 8 x i16> undef to <vscale x 8 x i32>
-  %sext_nxv8_i16_to_i64 = sext <vscale x 8 x i16> undef to <vscale x 8 x i64>
-  %sext_nxv4_i32_to_i64 = sext <vscale x 4 x i32> undef to <vscale x 4 x i64>
-  %sext_nxv4_i8_to_i64  = sext <vscale x 4 x i8>  undef to <vscale x 4 x i64>
-  %sext_nxv8_i8_to_i32  = sext <vscale x 8 x i8>  undef to <vscale x 8 x i32>
-  %sext_nxv4_i16_to_i64 = sext <vscale x 4 x i16> undef to <vscale x 4 x i64>
-  %sext_nxv8_i8_to_i64  = sext <vscale x 8 x i8>  undef to <vscale x 8 x i64>
+  %sext_nxv16_i8_to_i16 = sext <vscale x 16 x i8> poison to <vscale x 16 x i16>
+  %sext_nxv16_i8_to_i32 = sext <vscale x 16 x i8> poison to <vscale x 16 x i32>
+  %sext_nxv16_i8_to_i64 = sext <vscale x 16 x i8> poison to <vscale x 16 x i64>
+  %sext_nxv8_i16_to_i32 = sext <vscale x 8 x i16> poison to <vscale x 8 x i32>
+  %sext_nxv8_i16_to_i64 = sext <vscale x 8 x i16> poison to <vscale x 8 x i64>
+  %sext_nxv4_i32_to_i64 = sext <vscale x 4 x i32> poison to <vscale x 4 x i64>
+  %sext_nxv4_i8_to_i64  = sext <vscale x 4 x i8>  poison to <vscale x 4 x i64>
+  %sext_nxv8_i8_to_i32  = sext <vscale x 8 x i8>  poison to <vscale x 8 x i32>
+  %sext_nxv4_i16_to_i64 = sext <vscale x 4 x i16> poison to <vscale x 4 x i64>
+  %sext_nxv8_i8_to_i64  = sext <vscale x 8 x i8>  poison to <vscale x 8 x i64>
 
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
index 4ad0e3f..1e698b1 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
@@ -6,49 +6,49 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define void @sve_fpext() {
 ; CHECK-LABEL: 'sve_fpext'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x half> poison to <vscale x 2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x half> poison to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x half> poison to <vscale x 8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x half> poison to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x half> poison to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x half> poison to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f32_to_f64 = fpext <vscale x 2 x float> poison to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f32_to_f64 = fpext <vscale x 4 x float> poison to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f32_to_f64 = fpext <vscale x 8 x float> poison to <vscale x 8 x double>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
-  %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
-  %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
+  %nxv2_f16_to_f32 = fpext <vscale x 2 x half> poison to <vscale x 2 x float>
+  %nxv4_f16_to_f32 = fpext <vscale x 4 x half> poison to <vscale x 4 x float>
+  %nxv8_f16_to_f32 = fpext <vscale x 8 x half> poison to <vscale x 8 x float>
 
-  %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
-  %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
-  %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
+  %nxv2_f16_to_f64 = fpext <vscale x 2 x half> poison to <vscale x 2 x double>
+  %nxv4_f16_to_f64 = fpext <vscale x 4 x half> poison to <vscale x 4 x double>
+  %nxv8_f16_to_f64 = fpext <vscale x 8 x half> poison to <vscale x 8 x double>
 
-  %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
-  %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
-  %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
+  %nxv2_f32_to_f64 = fpext <vscale x 2 x float> poison to <vscale x 2 x double>
+  %nxv4_f32_to_f64 = fpext <vscale x 4 x float> poison to <vscale x 4 x double>
+  %nxv8_f32_to_f64 = fpext <vscale x 8 x float> poison to <vscale x 8 x double>
 
   ret void
 }
 
 define void @sve_fpext_bf16() {
 ; CHECK-LABEL: 'sve_fpext_bf16'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> poison to <vscale x 2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> poison to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> poison to <vscale x 8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> poison to <vscale x 2 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> poison to <vscale x 4 x double>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> poison to <vscale x 8 x double>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
-  %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
-  %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
+  %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> poison to <vscale x 2 x float>
+  %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> poison to <vscale x 4 x float>
+  %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> poison to <vscale x 8 x float>
 
-  %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
-  %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
-  %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
+  %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> poison to <vscale x 2 x double>
+  %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> poison to <vscale x 4 x double>
+  %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> poison to <vscale x 8 x double>
 
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
index 06ed58d..ce624a1 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll
@@ -6,163 +6,163 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define void @sve-fptoi() {
 ; CHECK-LABEL: 'sve-fptoi'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si8 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui8 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si32 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui32 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si64 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui64 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si8 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui8 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si16 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui16 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si64 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui64 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_si8 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_ui8 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_si16 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_ui16 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_si32 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_ui32 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_si8 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_ui8 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_si32 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_ui32 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_si64 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_ui64 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_si16 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_ui16 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_si16 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_ui16 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f16_to_si8 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f16_to_ui8 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f16_to_si32 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f16_to_ui32 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f16_to_si64 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f16_to_ui64 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f32_to_si16 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f32_to_ui16 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si16 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui16 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv8f16_to_si8 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv8f16_to_ui8 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_si32 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_ui32 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_si64 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_ui64 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si16 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui16 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si16 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui16 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si8 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui8 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si32 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui32 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_si64 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f16_to_ui64 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si8 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui8 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si16 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui16 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_si64 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nv1f32_to_ui64 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_si8 = fptosi <vscale x 1 x double> poison to <vscale x 1 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_ui8 = fptoui <vscale x 1 x double> poison to <vscale x 1 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_si16 = fptosi <vscale x 1 x double> poison to <vscale x 1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_ui16 = fptoui <vscale x 1 x double> poison to <vscale x 1 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_si32 = fptosi <vscale x 1 x double> poison to <vscale x 1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv1f64_to_ui32 = fptoui <vscale x 1 x double> poison to <vscale x 1 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_si8 = fptosi <vscale x 2 x half> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_ui8 = fptoui <vscale x 2 x half> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_si32 = fptosi <vscale x 2 x half> poison to <vscale x 2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_ui32 = fptoui <vscale x 2 x half> poison to <vscale x 2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_si64 = fptosi <vscale x 2 x half> poison to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f16_to_ui64 = fptoui <vscale x 2 x half> poison to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_si8 = fptosi <vscale x 2 x float> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_ui8 = fptoui <vscale x 2 x float> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_si16 = fptosi <vscale x 2 x float> poison to <vscale x 2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_ui16 = fptoui <vscale x 2 x float> poison to <vscale x 2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_si64 = fptosi <vscale x 2 x float> poison to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f32_to_ui64 = fptoui <vscale x 2 x float> poison to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_si8 = fptosi <vscale x 2 x double> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_ui8 = fptoui <vscale x 2 x double> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_si16 = fptosi <vscale x 2 x double> poison to <vscale x 2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_ui16 = fptoui <vscale x 2 x double> poison to <vscale x 2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_si32 = fptosi <vscale x 2 x double> poison to <vscale x 2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv2f64_to_ui32 = fptoui <vscale x 2 x double> poison to <vscale x 2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f16_to_si8 = fptosi <vscale x 4 x half> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f16_to_ui8 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f16_to_si32 = fptosi <vscale x 4 x half> poison to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f16_to_ui32 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f16_to_si64 = fptosi <vscale x 4 x half> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f16_to_ui64 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f32_to_si8 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f32_to_ui8 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f32_to_si16 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv4f32_to_ui16 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f32_to_si64 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si8 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si16 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui16 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_si32 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv8f16_to_si8 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nv8f16_to_ui8 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_si32 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_ui32 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_si64 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f16_to_ui64 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si8 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si16 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui16 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_si64 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si8 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si16 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui16 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_si32 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i32>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %nv1f16_to_si8  = fptosi <vscale x 1 x half> undef to <vscale x 1 x i8>
-  %nv1f16_to_ui8  = fptoui <vscale x 1 x half> undef to <vscale x 1 x i8>
-  %nv1f16_to_si32 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i32>
-  %nv1f16_to_ui32 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i32>
-  %nv1f16_to_si64 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i64>
-  %nv1f16_to_ui64 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i64>
+  %nv1f16_to_si8  = fptosi <vscale x 1 x half> poison to <vscale x 1 x i8>
+  %nv1f16_to_ui8  = fptoui <vscale x 1 x half> poison to <vscale x 1 x i8>
+  %nv1f16_to_si32 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i32>
+  %nv1f16_to_ui32 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i32>
+  %nv1f16_to_si64 = fptosi <vscale x 1 x half> poison to <vscale x 1 x i64>
+  %nv1f16_to_ui64 = fptoui <vscale x 1 x half> poison to <vscale x 1 x i64>
 
-  %nv1f32_to_si8  = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
-  %nv1f32_to_ui8  = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
-  %nv1f32_to_si16 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i16>
-  %nv1f32_to_ui16 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i16>
-  %nv1f32_to_si64 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i64>
-  %nv1f32_to_ui64 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i64>
+  %nv1f32_to_si8  = fptosi <vscale x 1 x float> poison to <vscale x 1 x i8>
+  %nv1f32_to_ui8  = fptoui <vscale x 1 x float> poison to <vscale x 1 x i8>
+  %nv1f32_to_si16 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i16>
+  %nv1f32_to_ui16 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i16>
+  %nv1f32_to_si64 = fptosi <vscale x 1 x float> poison to <vscale x 1 x i64>
+  %nv1f32_to_ui64 = fptoui <vscale x 1 x float> poison to <vscale x 1 x i64>
 
-  %nv1f64_to_si8  = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
-  %nv1f64_to_ui8  = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
-  %nv1f64_to_si16 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i16>
-  %nv1f64_to_ui16 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i16>
-  %nv1f64_to_si32 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i32>
-  %nv1f64_to_ui32 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i32>
+  %nv1f64_to_si8  = fptosi <vscale x 1 x double> poison to <vscale x 1 x i8>
+  %nv1f64_to_ui8  = fptoui <vscale x 1 x double> poison to <vscale x 1 x i8>
+  %nv1f64_to_si16 = fptosi <vscale x 1 x double> poison to <vscale x 1 x i16>
+  %nv1f64_to_ui16 = fptoui <vscale x 1 x double> poison to <vscale x 1 x i16>
+  %nv1f64_to_si32 = fptosi <vscale x 1 x double> poison to <vscale x 1 x i32>
+  %nv1f64_to_ui32 = fptoui <vscale x 1 x double> poison to <vscale x 1 x i32>
 
-  %nv2f16_to_si8  = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
-  %nv2f16_to_ui8  = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>
-  %nv2f16_to_si32 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i32>
-  %nv2f16_to_ui32 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i32>
-  %nv2f16_to_si64 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i64>
-  %nv2f16_to_ui64 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i64>
+  %nv2f16_to_si8  = fptosi <vscale x 2 x half> poison to <vscale x 2 x i8>
+  %nv2f16_to_ui8  = fptoui <vscale x 2 x half> poison to <vscale x 2 x i8>
+  %nv2f16_to_si32 = fptosi <vscale x 2 x half> poison to <vscale x 2 x i32>
+  %nv2f16_to_ui32 = fptoui <vscale x 2 x half> poison to <vscale x 2 x i32>
+  %nv2f16_to_si64 = fptosi <vscale x 2 x half> poison to <vscale x 2 x i64>
+  %nv2f16_to_ui64 = fptoui <vscale x 2 x half> poison to <vscale x 2 x i64>
 
-  %nv2f32_to_si8  = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
-  %nv2f32_to_ui8  = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
-  %nv2f32_to_si16 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i16>
-  %nv2f32_to_ui16 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i16>
-  %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
-  %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
+  %nv2f32_to_si8  = fptosi <vscale x 2 x float> poison to <vscale x 2 x i8>
+  %nv2f32_to_ui8  = fptoui <vscale x 2 x float> poison to <vscale x 2 x i8>
+  %nv2f32_to_si16 = fptosi <vscale x 2 x float> poison to <vscale x 2 x i16>
+  %nv2f32_to_ui16 = fptoui <vscale x 2 x float> poison to <vscale x 2 x i16>
+  %nv2f32_to_si64 = fptosi <vscale x 2 x float> poison to <vscale x 2 x i64>
+  %nv2f32_to_ui64 = fptoui <vscale x 2 x float> poison to <vscale x 2 x i64>
 
-  %nv2f64_to_si8  = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
-  %nv2f64_to_ui8  = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
-  %nv2f64_to_si16 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i16>
-  %nv2f64_to_ui16 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i16>
-  %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
-  %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
+  %nv2f64_to_si8  = fptosi <vscale x 2 x double> poison to <vscale x 2 x i8>
+  %nv2f64_to_ui8  = fptoui <vscale x 2 x double> poison to <vscale x 2 x i8>
+  %nv2f64_to_si16 = fptosi <vscale x 2 x double> poison to <vscale x 2 x i16>
+  %nv2f64_to_ui16 = fptoui <vscale x 2 x double> poison to <vscale x 2 x i16>
+  %nv2f64_to_si32 = fptosi <vscale x 2 x double> poison to <vscale x 2 x i32>
+  %nv2f64_to_ui32 = fptoui <vscale x 2 x double> poison to <vscale x 2 x i32>
 
-  %nv4f16_to_si8  = fptosi <vscale x 4 x half> undef to <vscale x 4 x i8>
-  %nv4f16_to_ui8  = fptoui <vscale x 4 x half> undef to <vscale x 4 x i8>
-  %nv4f16_to_si32 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i32>
-  %nv4f16_to_ui32 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i32>
-  %nv4f16_to_si64 = fptosi <vscale x 4 x half> undef to <vscale x 4 x i64>
-  %nv4f16_to_ui64 = fptoui <vscale x 4 x half> undef to <vscale x 4 x i64>
+  %nv4f16_to_si8  = fptosi <vscale x 4 x half> poison to <vscale x 4 x i8>
+  %nv4f16_to_ui8  = fptoui <vscale x 4 x half> poison to <vscale x 4 x i8>
+  %nv4f16_to_si32 = fptosi <vscale x 4 x half> poison to <vscale x 4 x i32>
+  %nv4f16_to_ui32 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i32>
+  %nv4f16_to_si64 = fptosi <vscale x 4 x half> poison to <vscale x 4 x i64>
+  %nv4f16_to_ui64 = fptoui <vscale x 4 x half> poison to <vscale x 4 x i64>
 
-  %nv4f32_to_si8  = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
-  %nv4f32_to_ui8  = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
-  %nv4f32_to_si16 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i16>
-  %nv4f32_to_ui16 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i16>
-  %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
-  %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
+  %nv4f32_to_si8  = fptosi <vscale x 4 x float> poison to <vscale x 4 x i8>
+  %nv4f32_to_ui8  = fptoui <vscale x 4 x float> poison to <vscale x 4 x i8>
+  %nv4f32_to_si16 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i16>
+  %nv4f32_to_ui16 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i16>
+  %nv4f32_to_si64 = fptosi <vscale x 4 x float> poison to <vscale x 4 x i64>
+  %nv4f32_to_ui64 = fptoui <vscale x 4 x float> poison to <vscale x 4 x i64>
 
-  %nv4f64_to_si8  = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
-  %nv4f64_to_ui8  = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
-  %nv4f64_to_si16 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i16>
-  %nv4f64_to_ui16 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i16>
-  %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
-  %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
+  %nv4f64_to_si8  = fptosi <vscale x 4 x double> poison to <vscale x 4 x i8>
+  %nv4f64_to_ui8  = fptoui <vscale x 4 x double> poison to <vscale x 4 x i8>
+  %nv4f64_to_si16 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i16>
+  %nv4f64_to_ui16 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i16>
+  %nv4f64_to_si32 = fptosi <vscale x 4 x double> poison to <vscale x 4 x i32>
+  %nv4f64_to_ui32 = fptoui <vscale x 4 x double> poison to <vscale x 4 x i32>
 
-  %nv8f16_to_si8  = fptosi <vscale x 8 x half> undef to <vscale x 8 x i8>
-  %nv8f16_to_ui8  = fptoui <vscale x 8 x half> undef to <vscale x 8 x i8>
-  %nv8f16_to_si32 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i32>
-  %nv8f16_to_ui32 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i32>
-  %nv8f16_to_si64 = fptosi <vscale x 8 x half> undef to <vscale x 8 x i64>
-  %nv8f16_to_ui64 = fptoui <vscale x 8 x half> undef to <vscale x 8 x i64>
+  %nv8f16_to_si8  = fptosi <vscale x 8 x half> poison to <vscale x 8 x i8>
+  %nv8f16_to_ui8  = fptoui <vscale x 8 x half> poison to <vscale x 8 x i8>
+  %nv8f16_to_si32 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i32>
+  %nv8f16_to_ui32 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i32>
+  %nv8f16_to_si64 = fptosi <vscale x 8 x half> poison to <vscale x 8 x i64>
+  %nv8f16_to_ui64 = fptoui <vscale x 8 x half> poison to <vscale x 8 x i64>
 
-  %nv8f32_to_si8  = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
-  %nv8f32_to_ui8  = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
-  %nv8f32_to_si16 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i16>
-  %nv8f32_to_ui16 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i16>
-  %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
-  %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
+  %nv8f32_to_si8  = fptosi <vscale x 8 x float> poison to <vscale x 8 x i8>
+  %nv8f32_to_ui8  = fptoui <vscale x 8 x float> poison to <vscale x 8 x i8>
+  %nv8f32_to_si16 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i16>
+  %nv8f32_to_ui16 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i16>
+  %nv8f32_to_si64 = fptosi <vscale x 8 x float> poison to <vscale x 8 x i64>
+  %nv8f32_to_ui64 = fptoui <vscale x 8 x float> poison to <vscale x 8 x i64>
 
-  %nv8f64_to_si8  = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
-  %nv8f64_to_ui8  = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
-  %nv8f64_to_si16 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i16>
-  %nv8f64_to_ui16 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i16>
-  %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
-  %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+  %nv8f64_to_si8  = fptosi <vscale x 8 x double> poison to <vscale x 8 x i8>
+  %nv8f64_to_ui8  = fptoui <vscale x 8 x double> poison to <vscale x 8 x i8>
+  %nv8f64_to_si16 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i16>
+  %nv8f64_to_ui16 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i16>
+  %nv8f64_to_si32 = fptosi <vscale x 8 x double> poison to <vscale x 8 x i32>
+  %nv8f64_to_ui32 = fptoui <vscale x 8 x double> poison to <vscale x 8 x i32>
 
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
index 73556d7e..5b30c33 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
@@ -8,67 +8,67 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define void @sve_fptruncs() {
 ; CHECK-LABEL: 'sve_fptruncs'
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x half>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x half>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x half>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x half>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x half>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x half>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x float>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x float>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
-  %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
-  %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
+  %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x half>
+  %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x half>
+  %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x half>
 
-  %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
-  %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
-  %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
+  %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x half>
+  %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x half>
+  %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x half>
 
-  %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
-  %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
-  %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
+  %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x float>
+  %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x float>
+  %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x float>
 
   ret void
 }
 
 define void @sve_fptruncs_bf16() {
 ; CHECK-SVE-LABEL: 'sve_fptruncs_bf16'
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of Invalid for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of Invalid for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
-; CHECK-SVE-NEXT:  Cost Model: Found costs of Invalid for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of Invalid for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of Invalid for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat>
+; CHECK-SVE-NEXT:  Cost Model: Found costs of Invalid for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat>
 ; CHECK-SVE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE2-LABEL: 'sve_fptruncs_bf16'
-; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
-; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
-; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
-; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
-; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
-; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
+; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat>
+; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat>
+; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat>
+; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat>
+; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat>
+; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat>
 ; CHECK-SVE2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-BF16-LABEL: 'sve_fptruncs_bf16'
-; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
-; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat>
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat>
 ; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
-  %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
-  %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
+  %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> poison to <vscale x 2 x bfloat>
+  %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> poison to <vscale x 4 x bfloat>
+  %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> poison to <vscale x 8 x bfloat>
 
-  %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
-  %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
-  %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
+  %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> poison to <vscale x 2 x bfloat>
+  %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> poison to <vscale x 4 x bfloat>
+  %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> poison to <vscale x 8 x bfloat>
 
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll b/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll
index 3e85760..2c838e2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-illegal-types.ll
@@ -1,14 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+sve  < %s | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @load_store(ptr %ptrs) {
 ; CHECK-LABEL: 'load_store'
-; CHECK-NEXT: Invalid cost for instruction: %load1 = load <vscale x 1 x i128>, ptr undef
-; CHECK-NEXT: Invalid cost for instruction: %load2 = load <vscale x 2 x i128>, ptr undef
-; CHECK-NEXT: Invalid cost for instruction: %load3 = load <vscale x 1 x fp128>, ptr undef
-; CHECK-NEXT: Invalid cost for instruction: %load4 = load <vscale x 2 x fp128>, ptr undef
-; CHECK-NEXT: Invalid cost for instruction: store <vscale x 1 x i128> %load1, ptr %ptrs
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %load1 = load <vscale x 1 x i128>, ptr undef, align 16
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %load2 = load <vscale x 2 x i128>, ptr undef, align 32
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %load3 = load <vscale x 1 x fp128>, ptr undef, align 16
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %load4 = load <vscale x 2 x fp128>, ptr undef, align 32
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: store <vscale x 1 x i128> %load1, ptr %ptrs, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %load1 = load <vscale x 1 x i128>, ptr undef
   %load2 = load <vscale x 2 x i128>, ptr undef
   %load3 = load <vscale x 1 x fp128>, ptr undef
@@ -19,8 +22,10 @@ define void @load_store(ptr %ptrs) {
 
 define void @masked_load_store(ptr %ptrs, ptr %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) {
 ; CHECK-LABEL: 'masked_load_store'
-; CHECK-NEXT: Invalid cost for instruction: %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128.p0(ptr %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
-; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.store.nxv1i128.p0(<vscale x 1 x i128> %mload, ptr %ptrs, i32 8, <vscale x 1 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128.p0(ptr %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.store.nxv1i128.p0(<vscale x 1 x i128> %mload, ptr %ptrs, i32 8, <vscale x 1 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128(ptr %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
   call void @llvm.masked.store.nxv1i128(<vscale x 1 x i128> %mload, ptr %ptrs, i32 8, <vscale x 1 x i1> %mask)
   ret void
@@ -28,8 +33,10 @@ define void @masked_load_store(ptr %ptrs, ptr %val, <vscale x 1 x i1> %mask, <vs
 
 define void @masked_gather_scatter(<vscale x 1 x ptr> %ptrs, <vscale x 1 x ptr> %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) {
 ; CHECK-LABEL: 'masked_gather_scatter'
-; CHECK-NEXT: Invalid cost for instruction: %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128.nxv1p0(<vscale x 1 x ptr> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
-; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i128.nxv1p0(<vscale x 1 x i128> %mgather, <vscale x 1 x ptr> %ptrs, i32 0, <vscale x 1 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128.nxv1p0(<vscale x 1 x ptr> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i128.nxv1p0(<vscale x 1 x i128> %mgather, <vscale x 1 x ptr> %ptrs, i32 0, <vscale x 1 x i1> %mask)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128(<vscale x 1 x ptr> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
   call void @llvm.masked.scatter.nxv1i128(<vscale x 1 x i128> %mgather, <vscale x 1 x ptr> %ptrs, i32 0, <vscale x 1 x i1> %mask)
   ret void
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
index 397b737..f7d3719 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll
@@ -5,82 +5,82 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @sve_truncs() {
 ; CHECK-LABEL: 'sve_truncs'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv4i16_to_i8 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv4i32_to_i8 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv4i64_to_i8 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv8i16_to_i8 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv8i32_to_i8 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i8 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i8_to_i1 = trunc <vscale x 2 x i8> poison to <vscale x 2 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i16_to_i1 = trunc <vscale x 2 x i16> poison to <vscale x 2 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i32_to_i1 = trunc <vscale x 2 x i32> poison to <vscale x 2 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv2i64_to_i1 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i8_to_i1 = trunc <vscale x 4 x i8> poison to <vscale x 4 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i16_to_i1 = trunc <vscale x 4 x i16> poison to <vscale x 4 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i32_to_i1 = trunc <vscale x 4 x i32> poison to <vscale x 4 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv4i64_to_i1 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i8_to_i1 = trunc <vscale x 8 x i8> poison to <vscale x 8 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i16_to_i1 = trunc <vscale x 8 x i16> poison to <vscale x 8 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i32_to_i1 = trunc <vscale x 8 x i32> poison to <vscale x 8 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i1 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i1>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> poison to <vscale x 2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv4i16_to_i8 = trunc <vscale x 4 x i16> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv4i32_to_i8 = trunc <vscale x 4 x i32> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv4i64_to_i8 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> poison to <vscale x 4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %trunc_nxv8i16_to_i8 = trunc <vscale x 8 x i16> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv8i32_to_i8 = trunc <vscale x 8 x i32> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i8 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> poison to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> poison to <vscale x 16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> poison to <vscale x 16 x i8>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> poison to <vscale x 16 x i8>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
-  %trunc_nxv2i8_to_i1   = trunc <vscale x 2 x i8>  undef to <vscale x 2 x i1>
-  %trunc_nxv2i16_to_i1  = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
-  %trunc_nxv2i32_to_i1  = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
-  %trunc_nxv2i64_to_i1  = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
+  %trunc_nxv2i8_to_i1   = trunc <vscale x 2 x i8>  poison to <vscale x 2 x i1>
+  %trunc_nxv2i16_to_i1  = trunc <vscale x 2 x i16> poison to <vscale x 2 x i1>
+  %trunc_nxv2i32_to_i1  = trunc <vscale x 2 x i32> poison to <vscale x 2 x i1>
+  %trunc_nxv2i64_to_i1  = trunc <vscale x 2 x i64> poison to <vscale x 2 x i1>
 
-  %trunc_nxv4i8_to_i1   = trunc <vscale x 4 x i8>  undef to <vscale x 4 x i1>
-  %trunc_nxv4i16_to_i1  = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
-  %trunc_nxv4i32_to_i1  = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
-  %trunc_nxv4i64_to_i1  = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
+  %trunc_nxv4i8_to_i1   = trunc <vscale x 4 x i8>  poison to <vscale x 4 x i1>
+  %trunc_nxv4i16_to_i1  = trunc <vscale x 4 x i16> poison to <vscale x 4 x i1>
+  %trunc_nxv4i32_to_i1  = trunc <vscale x 4 x i32> poison to <vscale x 4 x i1>
+  %trunc_nxv4i64_to_i1  = trunc <vscale x 4 x i64> poison to <vscale x 4 x i1>
 
-  %trunc_nxv8i8_to_i1   = trunc <vscale x 8 x i8>  undef to <vscale x 8 x i1>
-  %trunc_nxv8i16_to_i1  = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
-  %trunc_nxv8i32_to_i1  = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
-  %trunc_nxv8i64_to_i1  = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
+  %trunc_nxv8i8_to_i1   = trunc <vscale x 8 x i8>  poison to <vscale x 8 x i1>
+  %trunc_nxv8i16_to_i1  = trunc <vscale x 8 x i16> poison to <vscale x 8 x i1>
+  %trunc_nxv8i32_to_i1  = trunc <vscale x 8 x i32> poison to <vscale x 8 x i1>
+  %trunc_nxv8i64_to_i1  = trunc <vscale x 8 x i64> poison to <vscale x 8 x i1>
 
 ; Truncates to unpacked or legal types with vscale x 2 elements
-  %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i8>
-  %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i8>
-  %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i8>
-  %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
-  %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i16>
-  %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
+  %trunc_nxv2i16_to_i8 = trunc <vscale x 2 x i16> poison to <vscale x 2 x i8>
+  %trunc_nxv2i32_to_i8 = trunc <vscale x 2 x i32> poison to <vscale x 2 x i8>
+  %trunc_nxv2i64_to_i8 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i8>
+  %trunc_nxv2i32_to_i16 = trunc <vscale x 2 x i32> poison to <vscale x 2 x i16>
+  %trunc_nxv2i64_to_i16 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i16>
+  %trunc_nxv2i64_to_i32 = trunc <vscale x 2 x i64> poison to <vscale x 2 x i32>
 
 ; Truncates to unpacked or legal with vscale x 4 elements
-  %trunc_nxv4i16_to_i8  = trunc <vscale x 4 x i16> undef to <vscale x 4 x i8>
-  %trunc_nxv4i32_to_i8  = trunc <vscale x 4 x i32> undef to <vscale x 4 x i8>
-  %trunc_nxv4i64_to_i8  = trunc <vscale x 4 x i64> undef to <vscale x 4 x i8>
-  %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
-  %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i16>
-  %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
+  %trunc_nxv4i16_to_i8  = trunc <vscale x 4 x i16> poison to <vscale x 4 x i8>
+  %trunc_nxv4i32_to_i8  = trunc <vscale x 4 x i32> poison to <vscale x 4 x i8>
+  %trunc_nxv4i64_to_i8  = trunc <vscale x 4 x i64> poison to <vscale x 4 x i8>
+  %trunc_nxv4i32_to_i16 = trunc <vscale x 4 x i32> poison to <vscale x 4 x i16>
+  %trunc_nxv4i64_to_i16 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i16>
+  %trunc_nxv4i64_to_i32 = trunc <vscale x 4 x i64> poison to <vscale x 4 x i32>
 
 ; Truncates to unpacked or legal with vscale x 8 elements
-  %trunc_nxv8i16_to_i8  = trunc <vscale x 8 x i16> undef to <vscale x 8 x i8>
-  %trunc_nxv8i32_to_i8  = trunc <vscale x 8 x i32> undef to <vscale x 8 x i8>
-  %trunc_nxv8i64_to_i8  = trunc <vscale x 8 x i64> undef to <vscale x 8 x i8>
-  %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
-  %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i16>
+  %trunc_nxv8i16_to_i8  = trunc <vscale x 8 x i16> poison to <vscale x 8 x i8>
+  %trunc_nxv8i32_to_i8  = trunc <vscale x 8 x i32> poison to <vscale x 8 x i8>
+  %trunc_nxv8i64_to_i8  = trunc <vscale x 8 x i64> poison to <vscale x 8 x i8>
+  %trunc_nxv8i32_to_i16 = trunc <vscale x 8 x i32> poison to <vscale x 8 x i16>
+  %trunc_nxv8i64_to_i16 = trunc <vscale x 8 x i64> poison to <vscale x 8 x i16>
 
 ; Truncates to unpacked or legal with vscale x 16 elements
-  %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> undef to <vscale x 16 x i8>
-  %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> undef to <vscale x 16 x i8>
-  %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> undef to <vscale x 16 x i8>
+  %trunc_nxv16i16_to_i8 = trunc <vscale x 16 x i16> poison to <vscale x 16 x i8>
+  %trunc_nxv16i32_to_i8 = trunc <vscale x 16 x i32> poison to <vscale x 16 x i8>
+  %trunc_nxv16i64_to_i8 = trunc <vscale x 16 x i64> poison to <vscale x 16 x i8>
 
   ret void
 }
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 7ec674a..dc4a72e 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes=gvn -S < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
@@ -5,12 +6,15 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 ; TBAA should prove that these calls don't interfere, since they are
 ; IntrArgReadMem and have TBAA metadata.
 
-; CHECK:      define <8 x i16> @test0(ptr %p, ptr %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) {
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) [[NUW:#[0-9]+]]
-; CHECK-NEXT:   call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m)
-; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test0(ptr %p, ptr %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) {
+; CHECK-LABEL: define <8 x i16> @test0(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], <8 x i16> [[Y:%.*]], <8 x i1> [[M:%.*]], <8 x i16> [[PT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[P]], i32 16, <8 x i1> [[M]], <8 x i16> [[PT]]) #[[ATTR2:[0-9]+]], !tbaa [[B_TBAA0:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[Y]], ptr [[Q]], i32 16, <8 x i1> [[M]]), !tbaa [[A_TBAA3:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add <8 x i16> [[A]], [[A]]
+; CHECK-NEXT:    ret <8 x i16> [[C]]
+;
 entry:
   %a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind, !tbaa !2
   call void @llvm.masked.store.v8i16.p0(<8 x i16> %y, ptr %q, i32 16, <8 x i1> %m), !tbaa !1
@@ -24,10 +28,16 @@ declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) nounwind
 
 ; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
 ; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
-; CHECK: attributes [[NUW]] = { nounwind }
 
 !0 = !{!"tbaa root"}
 !1 = !{!3, !3, i64 0}
 !2 = !{!4, !4, i64 0}
 !3 = !{!"A", !0}
 !4 = !{!"B", !0}
+;.
+; CHECK: [[B_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"B", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"tbaa root"}
+; CHECK: [[A_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK: [[META4]] = !{!"A", [[META2]]}
+;.
diff --git a/llvm/test/Assembler/dicompileunit-invalid-language-version.ll b/llvm/test/Assembler/dicompileunit-invalid-language-version.ll
new file mode 100644
index 0000000..b3794ac
--- /dev/null
+++ b/llvm/test/Assembler/dicompileunit-invalid-language-version.ll
@@ -0,0 +1,25 @@
+; RUN: split-file %s %t
+; RUN: not llvm-as < %t/dw_lang_with_version.ll -disable-output 2>&1 | FileCheck %s --check-prefix=WRONG-ATTR
+; RUN: not llvm-as < %t/overflow.ll -disable-output 2>&1 | FileCheck %s --check-prefix=OVERFLOW
+; RUN: not llvm-as < %t/version_without_name.ll -disable-output 2>&1 | FileCheck %s --check-prefix=NO-NAME
+; RUN: not llvm-as < %t/negative.ll -disable-output 2>&1 | FileCheck %s --check-prefix=NEGATIVE
+
+; WRONG-ATTR: error: 'sourceLanguageVersion' requires an associated 'sourceLanguageName' on !DICompileUnit
+; OVERFLOW: error: value for 'sourceLanguageVersion' too large, limit is 4294967295
+; NEGATIVE: error: expected unsigned integer
+; NO-NAME: error: missing one of 'language' or 'sourceLanguageName', required for !DICompileUnit
+
+;--- dw_lang_with_version.ll
+!0 = distinct !DICompileUnit(language: DW_LANG_C, sourceLanguageVersion: 1,
+                             file: !DIFile(filename: "", directory: ""))
+
+;--- overflow.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: 4294967298)
+
+;--- negative.ll
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_C, sourceLanguageVersion: -1,
+                             file: !DIFile(filename: "", directory: ""))
+
+;--- version_without_name.ll
+!0 = distinct !DICompileUnit(sourceLanguageVersion: 1,
+                             file: !DIFile(filename: "", directory: ""))
diff --git a/llvm/test/Bitcode/Inputs/compile-unit-no-versioned-language.bc b/llvm/test/Bitcode/Inputs/compile-unit-no-versioned-language.bc
new file mode 100644
index 0000000..461a34d0
--- /dev/null
+++ b/llvm/test/Bitcode/Inputs/compile-unit-no-versioned-language.bc
diff --git a/llvm/test/Bitcode/dwarf-source-language-version.ll b/llvm/test/Bitcode/dwarf-source-language-version.ll
new file mode 100644
index 0000000..311afd5
--- /dev/null
+++ b/llvm/test/Bitcode/dwarf-source-language-version.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s --implicit-check-not "sourceLanguageVersion: 0"
+
+; CHECK: sourceLanguageVersion: 120
+
+source_filename = "cu.cpp"
+target triple = "arm64-apple-macosx"
+
+!llvm.dbg.cu = !{!0, !5}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, sourceLanguageVersion: 120, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!1 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 5}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DICompileUnit(sourceLanguageName: DW_LNAME_ObjC_plus_plus, sourceLanguageVersion: 0, file: !6, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+!6 = !DIFile(filename: "cu2.cpp", directory: "/tmp")
diff --git a/llvm/test/Bitcode/upgrade-DICompileUnit-no-versioned-language.test b/llvm/test/Bitcode/upgrade-DICompileUnit-no-versioned-language.test
new file mode 100644
index 0000000..9475f9b
--- /dev/null
+++ b/llvm/test/Bitcode/upgrade-DICompileUnit-no-versioned-language.test
@@ -0,0 +1,21 @@
+; Test loading metadata which was not aware of versioned language names.
+;
+; RUN: llvm-dis -o - %p/Inputs/compile-unit-no-versioned-language.bc \
+; RUN:     | FileCheck %s --implicit-check-not "sourceLanguageName" --implicit-check-not "sourceLanguageVersion"
+
+; Input bitcode file was compiled from following source on
+; LLVM commit `fc22b58c25963ece6b041cadbdc931c2338955e4`:
+;
+; source_filename = "cu.cpp"
+; target triple = "arm64-apple-macosx"
+; 
+; !llvm.dbg.cu = !{!0}
+; !llvm.module.flags = !{!3, !4}
+; 
+; !0 = distinct !DICompileUnit(language: DW_LANG_ObjC, file: !1, producer: "handwritten", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/")
+; !1 = !DIFile(filename: "cu.cpp", directory: "/tmp")
+; !2 = !{}
+; !3 = !{i32 7, !"Dwarf Version", i32 5}
+; !4 = !{i32 2, !"Debug Info Version", i32 3}
+
+; CHECK: distinct !DICompileUnit(language: DW_LANG_ObjC,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-compress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-compress.mir
index cc75774..c2bf95c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-compress.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-compress.mir
@@ -15,8 +15,9 @@ body:             |
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
     ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[C1]](s64)
     ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C1]], [[C2]]
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY2]], [[C2]]
     ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64)
     ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[PTR_ADD]](p0) :: (store (s32))
     ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s16>), [[C1]](s64)
@@ -91,7 +92,8 @@ body:             |
     ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32))
     ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
     ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64)
-    ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[C3]], [[C2]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C3]](s64)
+    ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[COPY3]], [[C2]]
     ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL1]](s64)
     ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[PTR_ADD1]](p0) :: (store (s32))
     ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s16>), [[C3]](s64)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
index 649d0a9..e7e9ee7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matmul.ll
@@ -1,41 +1,54 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s -o -| FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm    < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm -global-isel < %s | FileCheck %s
 
 define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: smmla.v4i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    smmla v0.4s, v1.16b, v2.16b
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: smmla.v4i32.v16i8
-; CHECK: smmla   v0.4s, v1.16b, v2.16b
   %vmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
   ret <4 x i32> %vmmla1.i
 }
 
 define <4 x i32> @ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: ummla.v4i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ummla v0.4s, v1.16b, v2.16b
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: ummla.v4i32.v16i8
-; CHECK: ummla   v0.4s, v1.16b, v2.16b
   %vmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
   ret <4 x i32> %vmmla1.i
 }
 
 define <4 x i32> @usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: usmmla.v4i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usmmla v0.4s, v1.16b, v2.16b
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: usmmla.v4i32.v16i8
-; CHECK: usmmla   v0.4s, v1.16b, v2.16b
   %vusmmla1.i = tail call <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
   ret <4 x i32> %vusmmla1.i
 }
 
 define <2 x i32> @usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: usdot.v2i32.v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot v0.2s, v1.8b, v2.8b
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: usdot.v2i32.v8i8
-; CHECK: usdot   v0.2s, v1.8b, v2.8b
   %vusdot1.i = tail call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b)
   ret <2 x i32> %vusdot1.i
 }
 
 define <2 x i32> @usdot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: usdot_lane.v2i32.v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    usdot v0.2s, v1.8b, v2.4b[0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: usdot_lane.v2i32.v8i8
-; CHECK: usdot   v0.2s, v1.8b, v2.4b[0]
   %0 = bitcast <8 x i8> %b to <2 x i32>
   %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
   %1 = bitcast <2 x i32> %shuffle to <8 x i8>
@@ -44,9 +57,12 @@ entry:
 }
 
 define <2 x i32> @sudot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: sudot_lane.v2i32.v8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    sudot v0.2s, v1.8b, v2.4b[0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: sudot_lane.v2i32.v8i8
-; CHECK: sudot   v0.2s, v1.8b, v2.4b[0]
   %0 = bitcast <8 x i8> %b to <2 x i32>
   %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
   %1 = bitcast <2 x i32> %shuffle to <8 x i8>
@@ -55,9 +71,11 @@ entry:
 }
 
 define <2 x i32> @usdot_lane.v2i32.v16i8(<2 x i32> %r, <8 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: usdot_lane.v2i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot v0.2s, v1.8b, v2.4b[0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: usdot_lane.v2i32.v16i8
-; CHECK: usdot   v0.2s, v1.8b, v2.4b[0]
   %0 = bitcast <16 x i8> %b to <4 x i32>
   %shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> zeroinitializer
   %1 = bitcast <2 x i32> %shuffle to <8 x i8>
@@ -66,9 +84,11 @@ entry:
 }
 
 define <2 x i32> @sudot_lane.v2i32.v16i8(<2 x i32> %r, <8 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: sudot_lane.v2i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sudot v0.2s, v1.8b, v2.4b[0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: sudot_lane.v2i32.v16i8
-; CHECK: sudot   v0.2s, v1.8b, v2.4b[0]
   %0 = bitcast <16 x i8> %b to <4 x i32>
   %shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> zeroinitializer
   %1 = bitcast <2 x i32> %shuffle to <8 x i8>
@@ -77,17 +97,22 @@ entry:
 }
 
 define <4 x i32> @usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: usdot.v4i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot v0.4s, v1.16b, v2.16b
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: usdot.v4i32.v16i8
-; CHECK: usdot   v0.4s, v1.16b, v2.16b
   %vusdot1.i = tail call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
   ret <4 x i32> %vusdot1.i
 }
 
 define <4 x i32> @usdot_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: usdot_lane.v4i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    usdot v0.4s, v1.16b, v2.4b[0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: usdot_lane.v4i32.v16i8
-; CHECK: usdot   v0.4s, v1.16b, v2.4b[0]
   %0 = bitcast <8 x i8> %b to <2 x i32>
   %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
   %1 = bitcast <4 x i32> %shuffle to <16 x i8>
@@ -96,9 +121,12 @@ entry:
 }
 
 define <4 x i32> @sudot_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: sudot_lane.v4i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    sudot v0.4s, v1.16b, v2.4b[0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: sudot_lane.v4i32.v16i8
-; CHECK: sudot   v0.4s, v1.16b, v2.4b[0]
   %0 = bitcast <8 x i8> %b to <2 x i32>
   %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
   %1 = bitcast <4 x i32> %shuffle to <16 x i8>
@@ -107,9 +135,11 @@ entry:
 }
 
 define <4 x i32> @usdot_laneq.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: usdot_laneq.v4i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    usdot v0.4s, v1.16b, v2.4b[0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: usdot_laneq.v4i32.v16i8
-; CHECK: usdot   v0.4s, v1.16b, v2.4b[0]
   %0 = bitcast <16 x i8> %b to <4 x i32>
   %shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
   %1 = bitcast <4 x i32> %shuffle to <16 x i8>
@@ -118,9 +148,11 @@ entry:
 }
 
 define <4 x i32> @sudot_laneq.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: sudot_laneq.v4i32.v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sudot v0.4s, v1.16b, v2.4b[0]
+; CHECK-NEXT:    ret
 entry:
-; CHECK-LABEL: sudot_laneq.v4i32.v16i8
-; CHECK: sudot   v0.4s, v1.16b, v2.4b[0]
   %0 = bitcast <16 x i8> %b to <4 x i32>
   %shuffle = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
   %1 = bitcast <4 x i32> %shuffle to <16 x i8>
@@ -133,4 +165,3 @@ declare <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16
 declare <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
 declare <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) #2
 declare <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
-
diff --git a/llvm/test/CodeGen/AArch64/aarch64-post-coalescer.mir b/llvm/test/CodeGen/AArch64/aarch64-post-coalescer.mir
new file mode 100644
index 0000000..6540160
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-post-coalescer.mir
@@ -0,0 +1,16 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -mattr=+sme -run-pass=aarch64-post-coalescer-pass -o - %s | FileCheck %s
+
+---
+name:            foo
+machineFunctionInfo:
+  hasStreamingModeChanges: true
+body:             |
+  bb.0.entry:
+  ; CHECK-LABEL: name: foo
+  ; CHECK: $d0 = COPY undef %0:fpr64
+  ; CHECK-NEXT: FAKE_USE implicit $d0
+  %1:fpr64 = COALESCER_BARRIER_FPR64 undef %1
+  $d0 = COPY %1
+  FAKE_USE implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir b/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir
new file mode 100644
index 0000000..3f174a6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir
@@ -0,0 +1,227 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme -run-pass=aarch64-machine-sme-abi -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+  ; Test moving a state change to be before a $nzcv def
+  define void @move_before_nzcv_def() "aarch64_inout_za" { ret void }
+
+  ; Test moving a state change to a point where $x0 is live
+  define void @move_to_x0_live() "aarch64_inout_za" { ret void }
+
+  ; Test we don't move before a previous state change.
+  define void @do_not_move_before_prior_state_change() "aarch64_za_state_agnostic" { ret void }
+
+  ; Test we don't move into a call sequence.
+  define void @do_not_move_into_call() "aarch64_inout_za" { ret void }
+
+  declare void @clobber()
+  declare void @inout_call() "aarch64_inout_za"
+...
+---
+name:            move_before_nzcv_def
+tracksRegLiveness: true
+isSSA:             true
+noVRegs:           false
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: move_before_nzcv_def
+    ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp
+    ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]]
+    ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]]
+    ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0
+    ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]]
+    ; CHECK-NEXT: MSR 56965, [[COPY1]]
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    ; CHECK-NEXT: RequiresZASavePseudo
+    ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
+    ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
+    ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
+    ; CHECK-NEXT: MSR 56965, $xzr
+    ; CHECK-NEXT: $nzcv = IMPLICIT_DEF
+    ; CHECK-NEXT: $zab0 = IMPLICIT_DEF
+    ; CHECK-NEXT: FAKE_USE $nzcv
+    ; CHECK-NEXT: RET_ReallyLR
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    RequiresZASavePseudo
+    BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+    $nzcv = IMPLICIT_DEF
+    $zab0 = IMPLICIT_DEF
+    FAKE_USE $nzcv
+
+    RET_ReallyLR
+...
+---
+name:            move_to_x0_live
+tracksRegLiveness: true
+isSSA:             true
+noVRegs:           false
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: move_to_x0_live
+    ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp
+    ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]]
+    ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]]
+    ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0
+    ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]]
+    ; CHECK-NEXT: MSR 56965, [[COPY1]]
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    ; CHECK-NEXT: RequiresZASavePseudo
+    ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    ; CHECK-NEXT: $x0 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
+    ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
+    ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
+    ; CHECK-NEXT: MSR 56965, $xzr
+    ; CHECK-NEXT: $x0 = COPY [[COPY2]]
+    ; CHECK-NEXT: $nzcv = IMPLICIT_DEF
+    ; CHECK-NEXT: FAKE_USE $x0
+    ; CHECK-NEXT: $zab0 = IMPLICIT_DEF
+    ; CHECK-NEXT: FAKE_USE $nzcv
+    ; CHECK-NEXT: RET_ReallyLR
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    RequiresZASavePseudo
+    BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+    $x0 = IMPLICIT_DEF
+
+    $nzcv = IMPLICIT_DEF
+    FAKE_USE $x0
+
+    $zab0 = IMPLICIT_DEF
+    FAKE_USE $nzcv
+
+    RET_ReallyLR
+...
+---
+name:            do_not_move_before_prior_state_change
+tracksRegLiveness: true
+isSSA:             true
+noVRegs:           false
+body:             |
+  ; CHECK-LABEL: name: do_not_move_before_prior_state_change
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BL &__arm_sme_state_size, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit-def $x0
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK-NEXT:   $sp = SUBXrx64 $sp, [[COPY]], 24
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64 = COPY $sp
+  ; CHECK-NEXT:   $nzcv = IMPLICIT_DEF
+  ; CHECK-NEXT:   $zab0 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv
+  ; CHECK-NEXT:   $x0 = COPY [[COPY1]]
+  ; CHECK-NEXT:   BL &__arm_sme_save, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0
+  ; CHECK-NEXT:   MSR 55824, [[MRS]], implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 2, %bb.1, implicit $nzcv
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $nzcv
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   FAKE_USE $nzcv
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   RequiresZASavePseudo
+  ; CHECK-NEXT:   BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   $x0 = COPY [[COPY1]]
+  ; CHECK-NEXT:   BL &__arm_sme_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   RequiresZASavePseudo
+  ; CHECK-NEXT:   BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   $x0 = COPY [[COPY1]]
+  ; CHECK-NEXT:   BL &__arm_sme_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0
+  ; CHECK-NEXT:   RET_ReallyLR
+  bb.0:
+    successors: %bb.1, %bb.2
+
+    ; The insertion point can move before the $nzcv def (as that would require
+    ; moving before a $zab0 def -- that requires the ACTIVE state).
+    $nzcv = IMPLICIT_DEF
+    $zab0 = IMPLICIT_DEF
+    Bcc 2, %bb.1, implicit $nzcv
+    B %bb.2
+  ; bb.1 and bb.2 both require ZA saved on entry (to force bb.0's exit bundle to
+  ; pick the LOCAL_SAVED state).
+  bb.1:
+    liveins: $nzcv
+    FAKE_USE $nzcv
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    RequiresZASavePseudo
+    BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+    RET_ReallyLR
+  bb.2:
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    RequiresZASavePseudo
+    BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+    RET_ReallyLR
+...
+---
+name:            do_not_move_into_call
+tracksRegLiveness: true
+isSSA:             true
+noVRegs:           false
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: do_not_move_into_call
+    ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp
+    ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]]
+    ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]]
+    ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0
+    ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]]
+    ; CHECK-NEXT: MSR 56965, [[COPY1]]
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    ; CHECK-NEXT: RequiresZASavePseudo
+    ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ; CHECK-NEXT: $nzcv = IMPLICIT_DEF
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
+    ; CHECK-NEXT: [[MRS1:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
+    ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0
+    ; CHECK-NEXT: RestoreZAPseudo [[MRS1]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
+    ; CHECK-NEXT: MSR 56965, $xzr
+    ; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv
+    ; CHECK-NEXT: $zab0 = IMPLICIT_DEF
+    ; CHECK-NEXT: FAKE_USE $nzcv
+    ; CHECK-NEXT: RET_ReallyLR
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    RequiresZASavePseudo
+    BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+
+    ; This is artificial test where NZCV is def'd inside a call, so we can't
+    ; move the insert point before it's definition.
+    $nzcv = IMPLICIT_DEF
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+
+    $zab0 = IMPLICIT_DEF
+    FAKE_USE $nzcv
+
+    RET_ReallyLR
+...
diff --git a/llvm/test/CodeGen/AArch64/mir-yaml-has-streaming-mode-changes.ll b/llvm/test/CodeGen/AArch64/mir-yaml-has-streaming-mode-changes.ll
new file mode 100644
index 0000000..8f1fe5c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/mir-yaml-has-streaming-mode-changes.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=aarch64 -mattr=+sme -stop-after=aarch64-isel < %s | FileCheck %s
+
+target triple = "aarch64"
+
+declare void @foo() "aarch64_pstate_sm_enabled"
+
+define dso_local void @bar() local_unnamed_addr {
+; CHECK-LABEL: name: bar
+; CHECK: hasStreamingModeChanges: true
+entry:
+  tail call void @foo() "aarch64_pstate_sm_enabled"
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index e3007a3..e4f9efa 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -391,11 +391,9 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s
 ; CHECK-NEWLOWERING-NEXT:    sub x19, x8, x0
 ; CHECK-NEWLOWERING-NEXT:  .LBB7_1: // =>This Inner Loop Header: Depth=1
 ; CHECK-NEWLOWERING-NEXT:    sub sp, sp, #16, lsl #12 // =65536
-; CHECK-NEWLOWERING-NEXT:    cmp sp, x19
 ; CHECK-NEWLOWERING-NEXT:    mov x0, x19
-; CHECK-NEWLOWERING-NEXT:    mrs x8, NZCV
 ; CHECK-NEWLOWERING-NEXT:    bl __arm_sme_save
-; CHECK-NEWLOWERING-NEXT:    msr NZCV, x8
+; CHECK-NEWLOWERING-NEXT:    cmp sp, x19
 ; CHECK-NEWLOWERING-NEXT:    b.le .LBB7_3
 ; CHECK-NEWLOWERING-NEXT:  // %bb.2: // in Loop: Header=BB7_1 Depth=1
 ; CHECK-NEWLOWERING-NEXT:    mov x0, x19
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir b/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir
index 18764d5..9f33c06 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir
@@ -62,14 +62,12 @@ body:             |
   ; CHECK-NEXT:   RequiresZASavePseudo
   ; CHECK-NEXT:   BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-NEXT:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 101, 0, implicit-def $nzcv
-  ; CHECK-NEXT:   [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv
   ; CHECK-NEXT:   MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
   ; CHECK-NEXT:   [[MRS1:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
   ; CHECK-NEXT:   $x0 = ADDXri %stack.0, 0, 0
   ; CHECK-NEXT:   RestoreZAPseudo [[MRS1]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
   ; CHECK-NEXT:   MSR 56965, $xzr
-  ; CHECK-NEXT:   MSR 55824, [[MRS]], implicit-def $nzcv
+  ; CHECK-NEXT:   [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 101, 0, implicit-def $nzcv
   ; CHECK-NEXT:   Bcc 11, %bb.2, implicit $nzcv
   ; CHECK-NEXT:   B %bb.1
   ; CHECK-NEXT: {{  $}}
@@ -116,16 +114,14 @@ body:             |
 # CHECK-ASM-LABEL: cmp_branch
 #       CHECK-ASM:   msr TPIDR2_EL0, x10
 #  CHECK-ASM-NEXT:   bl clobber
-#  CHECK-ASM-NEXT:   cmp w20, #101
-#  CHECK-ASM-NEXT:   mrs x8, NZCV
 #  CHECK-ASM-NEXT:   smstart za
-#  CHECK-ASM-NEXT:   mrs x9, TPIDR2_EL0
+#  CHECK-ASM-NEXT:   mrs x8, TPIDR2_EL0
 #  CHECK-ASM-NEXT:   sub x0, x29, #16
-#  CHECK-ASM-NEXT:   cbnz x9, .LBB0_2
+#  CHECK-ASM-NEXT:   cbnz x8, .LBB0_2
 #       CHECK-ASM:   bl __arm_tpidr2_restore
 #  CHECK-ASM-NEXT: .LBB0_2:
+#  CHECK-ASM-NEXT:   cmp w20, #101
 #  CHECK-ASM-NEXT:   msr TPIDR2_EL0, xzr
-#  CHECK-ASM-NEXT:   msr NZCV, x8
 #  CHECK-ASM-NEXT:   b.lt .LBB0_4
 #       CHECK-ASM:   bl inout_call
 #  CHECK-ASM-NEXT: .LBB0_4:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index 26b9d99..8705647 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -206,7 +206,7 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
 
 ; global nnan function attribute always forces clamp combine
 
-define float @test_min_max_global_nnan(float %a) #3 {
+define float @test_min_max_global_nnan(float %a) {
 ; GFX10-LABEL: test_min_max_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -223,11 +223,11 @@ define float @test_min_max_global_nnan(float %a) #3 {
 ; GFX12-NEXT:    v_max_num_f32_e64 v0, v0, v0 clamp
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %maxnum = call float @llvm.maxnum.f32(float %a, float 0.0)
-  %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
+  %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 1.0)
   ret float %fmed
 }
 
-define float @test_max_min_global_nnan(float %a) #3 {
+define float @test_max_min_global_nnan(float %a) {
 ; GFX10-LABEL: test_max_min_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -244,7 +244,7 @@ define float @test_max_min_global_nnan(float %a) #3 {
 ; GFX12-NEXT:    v_max_num_f32_e64 v0, v0, v0 clamp
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
   %minnum = call float @llvm.minnum.f32(float %a, float 1.0)
-  %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
+  %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 0.0)
   ret float %fmed
 }
 
@@ -414,5 +414,4 @@ declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
 attributes #0 = {"amdgpu-ieee"="true"}
 attributes #1 = {"amdgpu-ieee"="false"}
 attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
-attributes #3 = {"no-nans-fp-math"="true"}
 attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index d2c93e7..696a87b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -232,7 +232,7 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
 
 ; global nnan function attribute always forces fmed3 combine
 
-define float @test_min_max_global_nnan(float %a) #2 {
+define float @test_min_max_global_nnan(float %a) {
 ; GFX10-LABEL: test_min_max_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -254,12 +254,12 @@ define float @test_min_max_global_nnan(float %a) #2 {
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
-  %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
+  %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
   %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
   ret float %fmed
 }
 
-define float @test_max_min_global_nnan(float %a) #2 {
+define float @test_max_min_global_nnan(float %a) {
 ; GFX10-LABEL: test_max_min_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,8 +281,8 @@ define float @test_max_min_global_nnan(float %a) #2 {
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
-  %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
-  %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
+  %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0)
+  %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
   ret float %fmed
 }
 
@@ -560,4 +560,3 @@ declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
 attributes #0 = {"amdgpu-ieee"="true"}
 attributes #1 = {"amdgpu-ieee"="false"}
-attributes #2 = {"no-nans-fp-math"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 9e15225..3145a27 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -10,7 +10,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
 
-define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -120,7 +120,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -231,7 +231,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -342,7 +342,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -453,7 +453,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -569,7 +569,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -740,7 +740,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -955,14 +955,14 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out,
   %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %a = load float, ptr addrspace(1) %gep0
 
-  %max = call float @llvm.maxnum.f32(float %a, float 2.0)
-  %med = call float @llvm.minnum.f32(float %max, float 4.0)
+  %max = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
+  %med = call nnan float @llvm.minnum.f32(float %max, float 4.0)
 
   store float %med, ptr addrspace(1) %outgep
   ret void
 }
 
-define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -1297,10 +1297,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
   %a.fneg = fsub float -0.0, %a
-  %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -1487,10 +1487,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
   %b.fneg = fsub float -0.0, %b
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b.fneg)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b.fneg)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -1677,10 +1677,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
   %c.fneg = fsub float -0.0, %c
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fneg)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -1872,14 +1872,14 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
   %c = load volatile float, ptr addrspace(1) %gep2
 
   %a.fneg = fsub float -0.0, %a
-  %b.fabs = call float @llvm.fabs.f32(float %b)
-  %c.fabs = call float @llvm.fabs.f32(float %c)
+  %b.fabs = call nnan float @llvm.fabs.f32(float %b)
+  %c.fabs = call nnan float @llvm.fabs.f32(float %c)
   %c.fabs.fneg = fsub float -0.0, %c.fabs
 
-  %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
-  %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
 
   store float %med3, ptr addrspace(1) %outgep
   ret void
@@ -2082,16 +2082,16 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
   %c.fabs = call float @llvm.fabs.f32(float %c)
   %c.fabs.fneg = fsub float -0.0, %c.fabs
 
-  %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
-  %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
 
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2266,7 +2266,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2418,7 +2418,7 @@ define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2570,7 +2570,7 @@ define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -2878,10 +2878,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -3030,10 +3030,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -3220,10 +3220,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
   %a.fneg = fsub float -0.0, %a
-  %tmp0 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
-  %tmp1 = call float @llvm.minnum.f32(float %a.fneg, float %b)
-  %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b)
+  %tmp1 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+  %tmp2 = call nnan float @llvm.maxnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.minnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -3372,10 +3372,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -3524,10 +3524,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -3676,10 +3676,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -3828,10 +3828,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -3980,10 +3980,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -4132,10 +4132,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -4284,10 +4284,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -4436,10 +4436,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %o
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -4588,10 +4588,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -4740,10 +4740,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -4892,10 +4892,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -5044,10 +5044,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -5196,10 +5196,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -5348,10 +5348,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
-  %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
-  %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
-  %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %b, float %a)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %b, float %a)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %c, float %tmp1)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp2, float %tmp0)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -5503,10 +5503,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %tmp0 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp1 = call float @llvm.minnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp1 = call nnan float @llvm.minnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.maxnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.minnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -5515,7 +5515,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %
 ; Negative patterns
 ; ---------------------------------------------------------------------
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -5717,7 +5717,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -5944,7 +5944,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6146,7 +6146,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6352,7 +6352,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6527,7 +6527,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6702,7 +6702,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -6877,7 +6877,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -7270,10 +7270,10 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(pt
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
   %a.fneg = fsub float -0.0, %a
-  %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
-  %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
-  %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c)
+  %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %med3, ptr addrspace(1) %outgep
   ret void
 }
@@ -7428,13 +7428,13 @@ define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out
   %a = load volatile float, ptr addrspace(1) %gep0
   %b = load volatile float, ptr addrspace(1) %gep1
   %c = load volatile float, ptr addrspace(1) %gep2
-  %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %minmax = call float @llvm.minnum.f32(float %max, float %c)
+  %max = call nnan float @llvm.maxnum.f32(float %a, float %b)
+  %minmax = call nnan float @llvm.minnum.f32(float %max, float %c)
   store float %minmax, ptr addrspace(1) %outgep
   ret void
 }
 
-define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -7597,7 +7597,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) {
 ; SI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
@@ -7865,7 +7865,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: two_non_inline_constant:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -7998,7 +7998,7 @@ define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr ad
 }
 
 ; FIXME: Simple stores do not work as a multiple use because they are bitcasted to integer constants.
-define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: one_non_inline_constant:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -8137,7 +8137,7 @@ define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-SDAG-LABEL: two_non_inline_constant_multi_use:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
@@ -8343,7 +8343,7 @@ define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %o
   ret void
 }
 
-define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 {
+define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) {
 ; SI-LABEL: v_test_fmed3_r_i_i_f32_minimumnum_maximumnum:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8384,7 +8384,7 @@ define float @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 {
   ret float %med
 }
 
-define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> %a) #1 {
+define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> %a) {
 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8452,7 +8452,7 @@ define <2 x float> @v_test_fmed3_r_i_i_v2f32_minimumnum_maximumnum(<2 x float> %
   ret <2 x float> %med
 }
 
-define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use(float %a) #1 {
+define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use(float %a) {
 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8525,7 +8525,7 @@ define { float, float } @v_test_fmed3_r_i_i_f32_minimumnum_maximumnum_multi_use(
   ret { float, float } %ins.1
 }
 
-define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) #1 {
+define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a) {
 ; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8567,7 +8567,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_minimumnum_maximumnum(float %a)
   ret float %med
 }
 
-define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a) #1 {
+define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a) {
 ; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8609,7 +8609,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minimumnum(float %a)
   ret float %med
 }
 
-define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) #1 {
+define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) {
 ; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8651,7 +8651,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maxnum_minimumnum(float %a) #1 {
   ret float %med
 }
 
-define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) #1 {
+define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) {
 ; SI-LABEL: v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8693,7 +8693,7 @@ define float @v_test_nnan_input_fmed3_r_i_i_f32_maximumnum_minnum(float %a) #1 {
   ret float %med
 }
 
-define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) #1 {
+define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) {
 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f16_minimumnum_maximumnum:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8772,7 +8772,7 @@ define half @v_test_fmed3_r_i_i_f16_minimumnum_maximumnum(half %a) #1 {
   ret half %med
 }
 
-define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a) #1 {
+define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a) {
 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8848,7 +8848,7 @@ define <2 x half> @v_test_fmed3_r_i_i_v2f16_minimumnum_maximumnum(<2 x half> %a)
   ret <2 x half> %med
 }
 
-define double @v_test_fmed3_r_i_i_f64_minimumnum_maximumnum(double %a) #1 {
+define double @v_test_fmed3_r_i_i_f64_minimumnum_maximumnum(double %a) {
 ; SI-LABEL: v_test_fmed3_r_i_i_f64_minimumnum_maximumnum:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8905,5 +8905,4 @@ declare half @llvm.minnum.f16(half, half) #0
 declare half @llvm.maxnum.f16(half, half) #0
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
 attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index 56f9c5d..d578d2e 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -612,10 +612,10 @@ define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z)
 ; GFX1250-NEXT:    v_med3_num_f32 v2, v2, v3, v4
 ; GFX1250-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %tmp0 = call float @llvm.minnum.f32(float %x, float %y)
-  %tmp1 = call float @llvm.maxnum.f32(float %x, float %y)
-  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z)
-  %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minnum.f32(float %x, float %y)
+  %tmp1 = call nnan float @llvm.maxnum.f32(float %x, float %y)
+  %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %z)
+  %tmp3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2)
   store float %tmp3, ptr addrspace(1) %arg
   ret void
 }
@@ -646,10 +646,10 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x
 ; GFX1250-NEXT:    v_med3_num_f32 v2, v2, v3, v4
 ; GFX1250-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
-  %tmp0 = call float @llvm.minimumnum.f32(float %x, float %y)
-  %tmp1 = call float @llvm.maximumnum.f32(float %x, float %y)
-  %tmp2 = call float @llvm.minimumnum.f32(float %tmp1, float %z)
-  %tmp3 = call float @llvm.maximumnum.f32(float %tmp0, float %tmp2)
+  %tmp0 = call nnan float @llvm.minimumnum.f32(float %x, float %y)
+  %tmp1 = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+  %tmp2 = call nnan float @llvm.minimumnum.f32(float %tmp1, float %z)
+  %tmp3 = call nnan float @llvm.maximumnum.f32(float %tmp0, float %tmp2)
   store float %tmp3, ptr addrspace(1) %arg
   ret void
 }
@@ -1280,10 +1280,10 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
 ; GISEL-GFX1250-FAKE16-NEXT:    v_med3_num_f16 v2, v2, v3, v4
 ; GISEL-GFX1250-FAKE16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GISEL-GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
-  %tmp0 = call half @llvm.minnum.f16(half %x, half %y)
-  %tmp1 = call half @llvm.maxnum.f16(half %x, half %y)
-  %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z)
-  %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
+  %tmp0 = call nnan half @llvm.minnum.f16(half %x, half %y)
+  %tmp1 = call nnan half @llvm.maxnum.f16(half %x, half %y)
+  %tmp2 = call nnan half @llvm.minnum.f16(half %tmp1, half %z)
+  %tmp3 = call nnan half @llvm.maxnum.f16(half %tmp0, half %tmp2)
   store half %tmp3, ptr addrspace(1) %arg
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll
index a2d6ca9..972a470 100644
--- a/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll
+++ b/llvm/test/CodeGen/ARM/call-graph-section-addrtaken.ll
@@ -27,7 +27,7 @@ entry:
 !1 = !{i64 0, !"_ZTSFivE.generalized"}
 !2 = !{i64 0, !"_ZTSFviE.generalized"}
 
-; CHECK: .section .callgraph,"o",%progbits,.text
+; CHECK: .section .llvm.callgraph,"o",%progbits,.text
 ;; Version
 ; CHECK-NEXT: .byte   0
 ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0.
diff --git a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll
index bf5249e..ec8d5b8 100644
--- a/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll
+++ b/llvm/test/CodeGen/ARM/call-graph-section-assembly.ll
@@ -1,8 +1,8 @@
 ;; Test if temporary labels are generated for each indirect callsite.
-;; Test if the .callgraph section contains the MD5 hash of callees' type (type id)
+;; Test if the .llvm.callgraph section contains the MD5 hash of callees' type (type id)
 ;; is correctly paired with its corresponding temporary label generated for indirect
 ;; call sites annotated with !callee_type metadata.
-;; Test if the .callgraph section contains unique direct callees.
+;; Test if the .llvm.callgraph section contains unique direct callees.
 
 ; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -o - < %s | FileCheck %s
 
@@ -36,7 +36,7 @@ entry:
 !4 = !{!5}
 !5 = !{i64 0, !"_ZTSFPvS_E.generalized"}
 
-; CHECK: .section .callgraph,"o",%progbits,.text
+; CHECK: .section .llvm.callgraph,"o",%progbits,.text
 ;; Version
 ; CHECK-NEXT: .byte   0
 ;; Flags
diff --git a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll
index d577603..8036004 100644
--- a/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll
+++ b/llvm/test/CodeGen/ARM/call-graph-section-tailcall.ll
@@ -1,7 +1,7 @@
-;; Tests that we store the type identifiers in .callgraph section of the object file for tailcalls.
+;; Tests that we store the type identifiers in .llvm.callgraph section of the object file for tailcalls.
 
 ; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \
-; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s
 
 define i32 @check_tailcall(ptr %func, i8 %x) !type !0 {
 entry:
@@ -27,7 +27,7 @@ declare !type !2 i32 @bar(i8 signext)
 !2 = !{i64 0, !"_ZTSFicE.generalized"}
 !3 = !{i64 0, !"_ZTSFiiE.generalized"}
 
-; CHECK:      Hex dump of section '.callgraph':
+; CHECK:      Hex dump of section '.llvm.callgraph':
 ; CHECK-NEXT: 0x00000000 00050000 00008e19 0b7f3326 e3000154
 ; CHECK-NEXT: 0x00000010 86bc5981 4b8e3000 05100000 00a150b8
 ;; Verify that the type id 0x308e4b8159bc8654 is in section.
diff --git a/llvm/test/CodeGen/ARM/call-graph-section.ll b/llvm/test/CodeGen/ARM/call-graph-section.ll
index 928a1067..167cc6f 100644
--- a/llvm/test/CodeGen/ARM/call-graph-section.ll
+++ b/llvm/test/CodeGen/ARM/call-graph-section.ll
@@ -1,7 +1,7 @@
-;; Tests that we store the type identifiers in .callgraph section of the object file.
+;; Tests that we store the type identifiers in .llvm.callgraph section of the object file.
 
 ; RUN: llc -mtriple=arm-unknown-linux --call-graph-section -filetype=obj -o - < %s | \
-; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s
 
 declare !type !0 void @foo()
 
@@ -31,7 +31,7 @@ entry:
 
 ;; Make sure following type IDs are in call graph section
 ;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814
-; CHECK: Hex dump of section '.callgraph':
+; CHECK: Hex dump of section '.llvm.callgraph':
 ; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000324
 ; CHECK-NEXT: 0x00000010 44f731f5 eecb3e54 86bc5981 4b8e307a
 ; CHECK-NEXT: 0x00000020 de6814f8 97fd77
diff --git a/llvm/test/CodeGen/ARM/nnan-fsub.ll b/llvm/test/CodeGen/ARM/nnan-fsub.ll
index 0183908..78dd36f 100644
--- a/llvm/test/CodeGen/ARM/nnan-fsub.ll
+++ b/llvm/test/CodeGen/ARM/nnan-fsub.ll
@@ -1,18 +1,22 @@
-; RUN: llc -mcpu=cortex-a9 < %s | FileCheck -check-prefix=SAFE %s
-; RUN: llc -mcpu=cortex-a9 --enable-no-nans-fp-math < %s | FileCheck -check-prefix=FAST %s
+; RUN: llc -mcpu=cortex-a9 < %s | FileCheck %s
 
 target triple = "armv7-apple-ios"
 
-; SAFE: test
-; FAST: test
+; CHECK-LABEL: test
 define float @test(float %x, float %y) {
 entry:
-; SAFE: vmul.f32
-; SAFE: vsub.f32
-; FAST: mov r0, #0
+; CHECK: vmul.f32
+; CHECK-NEXT: vsub.f32
   %0 = fmul float %x, %y
   %1 = fsub float %0, %0
   ret float %1
 }
 
-
+; CHECK-LABEL: test_nnan
+define float @test_nnan(float %x, float %y) {
+entry:
+; CHECK: mov r0, #0
+  %0 = fmul float %x, %y
+  %1 = fsub nnan float %0, %0
+  ret float %1
+}
diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
index a78fdd5..f1486f97 100644
--- a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
+++ b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll
@@ -74,7 +74,7 @@ entry:
 ; CHECK:    [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
 ; CHECK:    [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
 ; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0
-; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
+; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
 ; CHECK:    [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
 ; CHECK:    [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
 ; CHECK:    [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
@@ -83,9 +83,9 @@ entry:
 ; CHECK:    [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
 ; CHECK:    [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
 ; CHECK:    [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
-; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 32
-; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
-  call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 64, i1 false)
+; CHECK:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 24
+; CHECK:    store <3 x double> [[UPTO2]], ptr [[DEST]], align 8
+  call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 48, i1 false)
 
 ; CHECK:    [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
 ; CHECK:    [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7)
diff --git a/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll b/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll
index 7ba2ed2..f1d28e2 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/cbuffer_metadata.ll
@@ -19,11 +19,11 @@ target triple = "dxil-pc-shadermodel6.6-compute"
 
 ; PRINT:; Resource Bindings:
 ; PRINT-NEXT:;
-; PRINT-NEXT:; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; PRINT-NEXT:; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; PRINT-NEXT:; CB1                               cbuffer      NA          NA     CB0            cb0     1
-; PRINT-NEXT:; CB2                               cbuffer      NA          NA     CB1            cb1     1
-; PRINT-NEXT:; MyConstants                       cbuffer      NA          NA     CB2    cb5,space15     1
+; PRINT-NEXT:; Name            Type  Format  Dim   ID    HLSL Bind  Count
+; PRINT-NEXT:; ----
+; PRINT-NEXT:; CB1          cbuffer      NA   NA  CB0          cb0     1
+; PRINT-NEXT:; CB2          cbuffer      NA   NA  CB1          cb1     1
+; PRINT-NEXT:; MyConstants  cbuffer      NA   NA  CB2  cb5,space15     1
 
 define void @test() #0 {
 
diff --git a/llvm/test/CodeGen/DirectX/bufferGetDimensions.ll b/llvm/test/CodeGen/DirectX/bufferGetDimensions.ll
new file mode 100644
index 0000000..ff03bf1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/bufferGetDimensions.ll
@@ -0,0 +1,16 @@
+; RUN: opt -S -dxil-op-lower %s | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+define i32 @test_getdimensions_no_mips() {
+  ; CHECK: %[[HANDLE:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, 
+  ; CHECK-NEXT: %[[ANNOT_HANDLE:.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[HANDLE]]
+  %handle = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+
+  ; CHECK-NEXT: %[[RETVAL:.*]] = call %dx.types.Dimensions @dx.op.getDimensions(i32 72, %dx.types.Handle %[[ANNOT_HANDLE]], i32 undef)
+  ; CHECK-NEXT: %[[DIM:.*]] = extractvalue %dx.types.Dimensions %[[RETVAL]], 0
+  %1 = call i32 @llvm.dx.resource.getdimensions.x(target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %handle)
+  
+  ; CHECK-NEXT: ret i32 %[[DIM]]
+  ret i32 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
index 2e500d5..da7546e 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
@@ -689,8 +689,8 @@
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_INSERT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_EXTRACT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
 # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv32.mir
new file mode 100644
index 0000000..d7c0e80
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv32.mir
@@ -0,0 +1,1742 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name:            insertelement_nxv1i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 1 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv1i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 1 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv1i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv1i1_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[COPY1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %2:_(s32) = COPY $x10
+    %0:_(s1) = G_TRUNC %2(s32)
+    %1:_(s32) = COPY $x11
+    %4:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    %3:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %4, %0(s1), %1(s32)
+    $v0 = COPY %3(<vscale x 1 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv2i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s32) = G_CONSTANT i32 1
+    %0:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 2 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv2i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 2 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv2i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv2i1_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[COPY1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %2:_(s32) = COPY $x10
+    %0:_(s1) = G_TRUNC %2(s32)
+    %1:_(s32) = COPY $x11
+    %4:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    %3:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %4, %0(s1), %1(s32)
+    $v0 = COPY %3(<vscale x 2 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv4i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s32) = G_CONSTANT i32 2
+    %0:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 4 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv4i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 4 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv4i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i1_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(s32) = COPY $x10
+    %0:_(s1) = G_TRUNC %1(s32)
+    %3:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %3, %0(s1), %4(s32)
+    $v0 = COPY %2(<vscale x 4 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv8i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 8 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv8i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 8 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv8i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv8i1_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[COPY1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %2:_(s32) = COPY $x10
+    %0:_(s1) = G_TRUNC %2(s32)
+    %1:_(s32) = COPY $x11
+    %4:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    %3:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %4, %0(s1), %1(s32)
+    $v0 = COPY %3(<vscale x 8 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv16i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s32) = G_CONSTANT i32 15
+    %0:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 16 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv16i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s32)
+    $v0 = COPY %0(<vscale x 16 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv16i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv16i1_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[COPY1]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %2:_(s32) = COPY $x10
+    %0:_(s1) = G_TRUNC %2(s32)
+    %1:_(s32) = COPY $x11
+    %4:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    %3:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %4, %0(s1), %1(s32)
+    $v0 = COPY %3(<vscale x 16 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv4i1_3
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $v0, $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i1_3
+    ; CHECK: liveins: $v0, $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s1), [[C]](s32)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %0:_(<vscale x 4 x s1>) = COPY $v0
+    %2:_(s32) = COPY $x10
+    %1:_(s1) = G_TRUNC %2(s32)
+    %4:_(s32) = G_CONSTANT i32 0
+    %3:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %0, %1(s1), %4(s32)
+    $v0 = COPY %3(<vscale x 4 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv1i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv1i8_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s32) = COPY $x10
+    %0:_(s8) = G_TRUNC %1(s32)
+    %3:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s32)
+    $v8 = COPY %2(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8 = COPY %0(<vscale x 2 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8 = COPY %0(<vscale x 2 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv2i8_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s32) = COPY $x10
+    %0:_(s8) = G_TRUNC %1(s32)
+    %3:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s32)
+    $v8 = COPY %2(<vscale x 2 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8 = COPY %0(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8 = COPY %0(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i8_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s32) = COPY $x10
+    %0:_(s8) = G_TRUNC %1(s32)
+    %3:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s32)
+    $v8 = COPY %2(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv8i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8 = COPY %0(<vscale x 8 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv8i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8 = COPY %0(<vscale x 8 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv8i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv8i8_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s32) = COPY $x10
+    %0:_(s8) = G_TRUNC %1(s32)
+    %3:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s32)
+    $v8 = COPY %2(<vscale x 8 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv16i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8m2 = COPY %0(<vscale x 16 x s8>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv16i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s32)
+    $v8m2 = COPY %0(<vscale x 16 x s8>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv16i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11, $x12
+
+    ; CHECK-LABEL: name: insertelement_nxv16i8_2
+    ; CHECK: liveins: $x10, $x11, $x12
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[COPY1]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %2:_(s32) = COPY $x10
+    %0:_(s8) = G_TRUNC %2(s32)
+    %3:_(s32) = COPY $x11
+    %4:_(s32) = COPY $x12
+    %1:_(s64) = G_MERGE_VALUES %3(s32), %4(s32)
+    %6:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    %7:_(s32) = G_TRUNC %1(s64)
+    %5:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %6, %0(s8), %7(s32)
+    $v8m2 = COPY %5(<vscale x 16 x s8>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv4i8_3
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $v8, $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i8_3
+    ; CHECK: liveins: $v8, $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s8), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 4 x s8>) = COPY $v8
+    %2:_(s32) = COPY $x10
+    %1:_(s8) = G_TRUNC %2(s32)
+    %4:_(s32) = G_CONSTANT i32 0
+    %3:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %0, %1(s8), %4(s32)
+    $v8 = COPY %3(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv1i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s32) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s32)
+    %3:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32)
+    $v8 = COPY %2(<vscale x 1 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s32) = G_CONSTANT i32 1
+    %0:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8 = COPY %0(<vscale x 2 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8 = COPY %0(<vscale x 2 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv2i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s32) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s32)
+    %3:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32)
+    $v8 = COPY %2(<vscale x 2 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8 = COPY %0(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8 = COPY %0(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s32) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s32)
+    %3:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32)
+    $v8 = COPY %2(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv8i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8m2 = COPY %0(<vscale x 8 x s16>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv8i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8m2 = COPY %0(<vscale x 8 x s16>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv8i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv8i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(s32) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s32)
+    %3:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32)
+    $v8m2 = COPY %2(<vscale x 8 x s16>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv16i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8m4 = COPY %0(<vscale x 16 x s16>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv16i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s32)
+    $v8m4 = COPY %0(<vscale x 16 x s16>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv16i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv16i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(s32) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s32)
+    %3:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    %4:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s32)
+    $v8m4 = COPY %2(<vscale x 16 x s16>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv4i16
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $v8, $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i16
+    ; CHECK: liveins: $v8, $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s16), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 4 x s16>) = COPY $v8
+    %2:_(s32) = COPY $x10
+    %1:_(s16) = G_TRUNC %2(s32)
+    %4:_(s32) = G_CONSTANT i32 0
+    %3:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %0, %1(s16), %4(s32)
+    $v8 = COPY %3(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32)
+    $v8 = COPY %0(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv1i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(s32) = COPY $x10
+    %2:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    %3:_(s32) = G_CONSTANT i32 0
+    %1:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+    $v8 = COPY %1(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32)
+    $v8 = COPY %0(<vscale x 2 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32)
+    $v8 = COPY %0(<vscale x 2 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv2i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(s32) = COPY $x10
+    %2:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    %3:_(s32) = G_CONSTANT i32 0
+    %1:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+    $v8 = COPY %1(<vscale x 2 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32)
+    $v8m2 = COPY %0(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv4i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32)
+    $v8m2 = COPY %0(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv4i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %0:_(s32) = COPY $x10
+    %2:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    %3:_(s32) = G_CONSTANT i32 0
+    %1:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+    $v8m2 = COPY %1(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv8i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32)
+    $v8m4 = COPY %0(<vscale x 8 x s32>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv8i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32)
+    $v8m4 = COPY %0(<vscale x 8 x s32>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv8i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv8i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %0:_(s32) = COPY $x10
+    %2:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    %3:_(s32) = G_CONSTANT i32 0
+    %1:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+    $v8m4 = COPY %1(<vscale x 8 x s32>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv16i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %2(s32)
+    $v8m8 = COPY %0(<vscale x 16 x s32>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv16i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s32)
+    $v8m8 = COPY %0(<vscale x 16 x s32>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv16i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv16i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %0:_(s32) = COPY $x10
+    %2:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    %3:_(s32) = G_CONSTANT i32 0
+    %1:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32)
+    $v8m8 = COPY %1(<vscale x 16 x s32>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv4i32
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $v8m2
+
+    ; CHECK-LABEL: name: insertelement_nxv4i32
+    ; CHECK: liveins: $x10, $v8m2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %0:_(<vscale x 4 x s32>) = COPY $v8m2
+    %1:_(s32) = COPY $x10
+    %3:_(s32) = G_CONSTANT i32 0
+    %2:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %3(s32)
+    $v8m2 = COPY %2(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv1i64_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i64_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s64>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i64_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i64_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C1]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s64>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i64_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv1i64_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s32) = COPY $x10
+    %2:_(s32) = COPY $x11
+    %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32)
+    %4:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    %5:_(s32) = G_CONSTANT i32 0
+    %3:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %4, %0(s64), %5(s32)
+    $v8 = COPY %3(<vscale x 1 x s64>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i64_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i64_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32)
+    $v8m2 = COPY %0(<vscale x 2 x s64>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv2i64_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i64_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C1]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32)
+    $v8m2 = COPY %0(<vscale x 2 x s64>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv2i64_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv2i64_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(s32) = COPY $x10
+    %2:_(s32) = COPY $x11
+    %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32)
+    %4:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    %5:_(s32) = G_CONSTANT i32 0
+    %3:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %4, %0(s64), %5(s32)
+    $v8m2 = COPY %3(<vscale x 2 x s64>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv4i64_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i64_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32)
+    $v8m4 = COPY %0(<vscale x 4 x s64>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv4i64_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i64_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C1]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32)
+    $v8m4 = COPY %0(<vscale x 4 x s64>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv4i64_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv4i64_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(s32) = COPY $x10
+    %2:_(s32) = COPY $x11
+    %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32)
+    %4:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    %5:_(s32) = G_CONSTANT i32 0
+    %3:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %4, %0(s64), %5(s32)
+    $v8m4 = COPY %3(<vscale x 4 x s64>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv8i64_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i64_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32)
+    $v8m8 = COPY %0(<vscale x 8 x s64>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv8i64_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i64_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C1]](s32)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 -1
+    %3:_(s32) = G_CONSTANT i32 0
+    %0:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s32)
+    $v8m8 = COPY %0(<vscale x 8 x s64>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv8i64_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv8i64_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(s32) = COPY $x10
+    %2:_(s32) = COPY $x11
+    %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32)
+    %4:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    %5:_(s32) = G_CONSTANT i32 0
+    %3:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %4, %0(s64), %5(s32)
+    $v8m8 = COPY %3(<vscale x 8 x s64>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv4i64
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11, $v8m4
+
+    ; CHECK-LABEL: name: insertelement_nxv4i64
+    ; CHECK: liveins: $x10, $x11, $v8m4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s64>) = COPY $v8m4
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], [[MV]](s64), [[C]](s32)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %0:_(<vscale x 4 x s64>) = COPY $v8m4
+    %2:_(s32) = COPY $x10
+    %3:_(s32) = COPY $x11
+    %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %5:_(s32) = G_CONSTANT i32 0
+    %4:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %0, %1(s64), %5(s32)
+    $v8m4 = COPY %4(<vscale x 4 x s64>)
+    PseudoRET implicit $v8m4
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv64.mir
new file mode 100644
index 0000000..4c33ddc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv64.mir
@@ -0,0 +1,1731 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name:            insertelement_nxv1i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 1 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv1i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 1 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv1i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv1i1_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[AND]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 1 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %2:_(s64) = COPY $x10
+    %0:_(s1) = G_TRUNC %2(s64)
+    %3:_(s64) = COPY $x11
+    %1:_(s32) = G_TRUNC %3(s64)
+    %5:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+    %6:_(s64) = G_ZEXT %1(s32)
+    %4:_(<vscale x 1 x s1>) = G_INSERT_VECTOR_ELT %5, %0(s1), %6(s64)
+    $v0 = COPY %4(<vscale x 1 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv2i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s64) = G_CONSTANT i64 1
+    %0:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 2 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv2i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 2 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv2i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv2i1_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[AND]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 2 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %2:_(s64) = COPY $x10
+    %0:_(s1) = G_TRUNC %2(s64)
+    %3:_(s64) = COPY $x11
+    %1:_(s32) = G_TRUNC %3(s64)
+    %5:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+    %6:_(s64) = G_ZEXT %1(s32)
+    %4:_(<vscale x 2 x s1>) = G_INSERT_VECTOR_ELT %5, %0(s1), %6(s64)
+    $v0 = COPY %4(<vscale x 2 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv4i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s64) = G_CONSTANT i64 2
+    %0:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 4 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv4i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 4 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv4i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i1_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(s64) = COPY $x10
+    %0:_(s1) = G_TRUNC %1(s64)
+    %3:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %3, %0(s1), %4(s64)
+    $v0 = COPY %2(<vscale x 4 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv8i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 8 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv8i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 8 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv8i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv8i1_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[AND]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 8 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %2:_(s64) = COPY $x10
+    %0:_(s1) = G_TRUNC %2(s64)
+    %3:_(s64) = COPY $x11
+    %1:_(s32) = G_TRUNC %3(s64)
+    %5:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+    %6:_(s64) = G_ZEXT %1(s32)
+    %4:_(<vscale x 8 x s1>) = G_INSERT_VECTOR_ELT %5, %0(s1), %6(s64)
+    $v0 = COPY %4(<vscale x 8 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv16i1_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i1_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 false
+    %3:_(s64) = G_CONSTANT i64 15
+    %0:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 16 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv16i1_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i1_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C1]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    %2:_(s1) = G_CONSTANT i1 true
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %1, %2(s1), %3(s64)
+    $v0 = COPY %0(<vscale x 16 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv16i1_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv16i1_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[AND]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 16 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %2:_(s64) = COPY $x10
+    %0:_(s1) = G_TRUNC %2(s64)
+    %3:_(s64) = COPY $x11
+    %1:_(s32) = G_TRUNC %3(s64)
+    %5:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+    %6:_(s64) = G_ZEXT %1(s32)
+    %4:_(<vscale x 16 x s1>) = G_INSERT_VECTOR_ELT %5, %0(s1), %6(s64)
+    $v0 = COPY %4(<vscale x 16 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv4i1_3
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $v0, $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i1_3
+    ; CHECK: liveins: $v0, $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s1), [[C]](s64)
+    ; CHECK-NEXT: $v0 = COPY [[IVEC]](<vscale x 4 x s1>)
+    ; CHECK-NEXT: PseudoRET implicit $v0
+    %0:_(<vscale x 4 x s1>) = COPY $v0
+    %2:_(s64) = COPY $x10
+    %1:_(s1) = G_TRUNC %2(s64)
+    %4:_(s64) = G_CONSTANT i64 0
+    %3:_(<vscale x 4 x s1>) = G_INSERT_VECTOR_ELT %0, %1(s1), %4(s64)
+    $v0 = COPY %3(<vscale x 4 x s1>)
+    PseudoRET implicit $v0
+...
+---
+name:            insertelement_nxv1i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8 = COPY %0(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8 = COPY %0(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv1i8_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s8) = G_TRUNC %1(s64)
+    %3:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 1 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s64)
+    $v8 = COPY %2(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8 = COPY %0(<vscale x 2 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8 = COPY %0(<vscale x 2 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv2i8_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s8) = G_TRUNC %1(s64)
+    %3:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 2 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s64)
+    $v8 = COPY %2(<vscale x 2 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8 = COPY %0(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8 = COPY %0(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i8_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s8) = G_TRUNC %1(s64)
+    %3:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s64)
+    $v8 = COPY %2(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv8i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8 = COPY %0(<vscale x 8 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv8i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8 = COPY %0(<vscale x 8 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv8i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv8i8_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 8 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s8) = G_TRUNC %1(s64)
+    %3:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 8 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %4(s64)
+    $v8 = COPY %2(<vscale x 8 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv16i8_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i8_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8m2 = COPY %0(<vscale x 16 x s8>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv16i8_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i8_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C1]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    %2:_(s8) = G_CONSTANT i8 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %1, %2(s8), %3(s64)
+    $v8m2 = COPY %0(<vscale x 16 x s8>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv16i8_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-LABEL: name: insertelement_nxv16i8_2
+    ; CHECK: liveins: $x10, $x11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[COPY1]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 16 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %2:_(s64) = COPY $x10
+    %0:_(s8) = G_TRUNC %2(s64)
+    %1:_(s64) = COPY $x11
+    %4:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+    %3:_(<vscale x 16 x s8>) = G_INSERT_VECTOR_ELT %4, %0(s8), %1(s64)
+    $v8m2 = COPY %3(<vscale x 16 x s8>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv4i8_3
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $v8, $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i8_3
+    ; CHECK: liveins: $v8, $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s8), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s8>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 4 x s8>) = COPY $v8
+    %2:_(s64) = COPY $x10
+    %1:_(s8) = G_TRUNC %2(s64)
+    %4:_(s64) = G_CONSTANT i64 0
+    %3:_(<vscale x 4 x s8>) = G_INSERT_VECTOR_ELT %0, %1(s8), %4(s64)
+    $v8 = COPY %3(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8 = COPY %0(<vscale x 1 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8 = COPY %0(<vscale x 1 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv1i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s64)
+    %3:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 1 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64)
+    $v8 = COPY %2(<vscale x 1 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s64) = G_CONSTANT i64 1
+    %0:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8 = COPY %0(<vscale x 2 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8 = COPY %0(<vscale x 2 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv2i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s64)
+    %3:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 2 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64)
+    $v8 = COPY %2(<vscale x 2 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8 = COPY %0(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8 = COPY %0(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s64)
+    %3:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64)
+    $v8 = COPY %2(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv8i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8m2 = COPY %0(<vscale x 8 x s16>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv8i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8m2 = COPY %0(<vscale x 8 x s16>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv8i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv8i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 8 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(s64) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s64)
+    %3:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 8 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64)
+    $v8m2 = COPY %2(<vscale x 8 x s16>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv16i16_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i16_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8m4 = COPY %0(<vscale x 16 x s16>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv16i16_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i16_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    %2:_(s16) = G_CONSTANT i16 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %1, %2(s16), %3(s64)
+    $v8m4 = COPY %0(<vscale x 16 x s16>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv16i16_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv16i16_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 16 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(s64) = COPY $x10
+    %0:_(s16) = G_TRUNC %1(s64)
+    %3:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 16 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %4(s64)
+    $v8m4 = COPY %2(<vscale x 16 x s16>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv4i16
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $v8, $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i16
+    ; CHECK: liveins: $v8, $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s16), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 4 x s16>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(<vscale x 4 x s16>) = COPY $v8
+    %2:_(s64) = COPY $x10
+    %1:_(s16) = G_TRUNC %2(s64)
+    %4:_(s64) = G_CONSTANT i64 0
+    %3:_(<vscale x 4 x s16>) = G_INSERT_VECTOR_ELT %0, %1(s16), %4(s64)
+    $v8 = COPY %3(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8 = COPY %0(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8 = COPY %0(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv1i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s32) = G_TRUNC %1(s64)
+    %3:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 1 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64)
+    $v8 = COPY %2(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8 = COPY %0(<vscale x 2 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8 = COPY %0(<vscale x 2 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv2i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 2 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(s64) = COPY $x10
+    %0:_(s32) = G_TRUNC %1(s64)
+    %3:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64)
+    $v8 = COPY %2(<vscale x 2 x s32>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv4i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8m2 = COPY %0(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv4i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8m2 = COPY %0(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv4i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(s64) = COPY $x10
+    %0:_(s32) = G_TRUNC %1(s64)
+    %3:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64)
+    $v8m2 = COPY %2(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv8i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8m4 = COPY %0(<vscale x 8 x s32>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv8i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8m4 = COPY %0(<vscale x 8 x s32>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv8i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv8i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 8 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(s64) = COPY $x10
+    %0:_(s32) = G_TRUNC %1(s64)
+    %3:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 8 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64)
+    $v8m4 = COPY %2(<vscale x 8 x s32>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv16i32_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i32_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8m8 = COPY %0(<vscale x 16 x s32>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv16i32_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv16i32_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C1]](s64)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %1, %2(s32), %3(s64)
+    $v8m8 = COPY %0(<vscale x 16 x s32>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv16i32_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv16i32_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 16 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(s64) = COPY $x10
+    %0:_(s32) = G_TRUNC %1(s64)
+    %3:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+    %4:_(s64) = G_CONSTANT i64 0
+    %2:_(<vscale x 16 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %4(s64)
+    $v8m8 = COPY %2(<vscale x 16 x s32>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv4i32
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $v8m2
+
+    ; CHECK-LABEL: name: insertelement_nxv4i32
+    ; CHECK: liveins: $x10, $v8m2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s32), [[C]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 4 x s32>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %0:_(<vscale x 4 x s32>) = COPY $v8m2
+    %2:_(s64) = COPY $x10
+    %1:_(s32) = G_TRUNC %2(s64)
+    %4:_(s64) = G_CONSTANT i64 0
+    %3:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %4(s64)
+    $v8m2 = COPY %3(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv1i64_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i64_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %2(s64)
+    $v8 = COPY %0(<vscale x 1 x s64>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i64_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv1i64_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s64)
+    $v8 = COPY %0(<vscale x 1 x s64>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv1i64_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv1i64_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64)
+    ; CHECK-NEXT: $v8 = COPY [[IVEC]](<vscale x 1 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %0:_(s64) = COPY $x10
+    %2:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+    %3:_(s64) = G_CONSTANT i64 0
+    %1:_(<vscale x 1 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64)
+    $v8 = COPY %1(<vscale x 1 x s64>)
+    PseudoRET implicit $v8
+...
+---
+name:            insertelement_nxv2i64_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i64_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %2(s64)
+    $v8m2 = COPY %0(<vscale x 2 x s64>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv2i64_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv2i64_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s64)
+    $v8m2 = COPY %0(<vscale x 2 x s64>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv2i64_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv2i64_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64)
+    ; CHECK-NEXT: $v8m2 = COPY [[IVEC]](<vscale x 2 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %0:_(s64) = COPY $x10
+    %2:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+    %3:_(s64) = G_CONSTANT i64 0
+    %1:_(<vscale x 2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64)
+    $v8m2 = COPY %1(<vscale x 2 x s64>)
+    PseudoRET implicit $v8m2
+...
+---
+name:            insertelement_nxv4i64_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i64_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %2(s64)
+    $v8m4 = COPY %0(<vscale x 4 x s64>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv4i64_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv4i64_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s64)
+    $v8m4 = COPY %0(<vscale x 4 x s64>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv4i64_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv4i64_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64)
+    ; CHECK-NEXT: $v8m4 = COPY [[IVEC]](<vscale x 4 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %0:_(s64) = COPY $x10
+    %2:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+    %3:_(s64) = G_CONSTANT i64 0
+    %1:_(<vscale x 4 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64)
+    $v8m4 = COPY %1(<vscale x 4 x s64>)
+    PseudoRET implicit $v8m4
+...
+---
+name:            insertelement_nxv8i64_0
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i64_0
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %2(s64)
+    $v8m8 = COPY %0(<vscale x 8 x s64>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv8i64_1
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: insertelement_nxv8i64_1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    %2:_(s64) = G_CONSTANT i64 -1
+    %3:_(s64) = G_CONSTANT i64 0
+    %0:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %1, %2(s64), %3(s64)
+    $v8m8 = COPY %0(<vscale x 8 x s64>)
+    PseudoRET implicit $v8m8
+...
+---
+name:            insertelement_nxv8i64_2
+legalized:       false
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10
+
+    ; CHECK-LABEL: name: insertelement_nxv8i64_2
+    ; CHECK: liveins: $x10
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64)
+    ; CHECK-NEXT: $v8m8 = COPY [[IVEC]](<vscale x 8 x s64>)
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %0:_(s64) = COPY $x10
+    %2:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+    %3:_(s64) = G_CONSTANT i64 0
+    %1:_(<vscale x 8 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64)
+    $v8m8 = COPY %1(<vscale x 8 x s64>)
+    PseudoRET implicit $v8m8
+...
diff --git a/llvm/test/CodeGen/RISCV/branch-rel.mir b/llvm/test/CodeGen/RISCV/branch-rel.mir
new file mode 100644
index 0000000..1ed5f57
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/branch-rel.mir
@@ -0,0 +1,39 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc %s -mtriple=riscv64 -run-pass=branch-relaxation -o - -verify-machineinstrs | FileCheck %s
+
+--- |
+  define void @foo() {
+    ret void
+  }
+...
+---
+name:            foo
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: foo
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   PseudoBR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &".space 4096", 1 /* sideeffect attdialect */
+  ; CHECK-NEXT:   BGE $x1, $x0, %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   PseudoRET
+  bb.0:
+    liveins: $x1
+    BNE $x1, $x0, %bb.3
+    PseudoBR %bb.3
+  bb.1:
+    liveins: $x1
+    INLINEASM &".space 4096", 1
+    BGE $x1, $x0, %bb.3
+  bb.3:
+    PseudoRET
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll
index c2b4494..11e7e5c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll
@@ -1,16 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
-; CHECK-LABEL: mul_v16i8
-; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1
-; CHECK: vector.body:
-; CHECK: %index = phi i32
-; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[ELEMS]])
-; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 16
-; CHECK: [[LD0:%[^ ]+]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr {{.*}}, i32 4, <16 x i1> [[VCTP]], <16 x i8> undef)
-; CHECK: [[LD1:%[^ ]+]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr {{.*}}, i32 4, <16 x i1> [[VCTP]], <16 x i8> undef)
-; CHECK: tail call void @llvm.masked.store.v16i8.p0(<16 x i8> {{.*}}, ptr {{.*}}, i32 4, <16 x i1> [[VCTP]])
 define dso_local arm_aapcs_vfpcc void @mul_v16i8(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) {
+; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mul_v16i8(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 15
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 4
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 4
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -16
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 4
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.vctp8(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 16
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef)
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw <16 x i8> [[WIDE_MASKED_LOAD2]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v16i8.p0(<16 x i8> [[MUL]], ptr [[TMP6]], i32 4, <16 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 15
@@ -45,17 +70,41 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-; CHECK-LABEL: mul_v8i16
-; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1
-; CHECK: vector.body:
-; CHECK: %index = phi i32
-; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[ELEMS]])
-; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 8
-; CHECK: [[LD0:%[^ ]+]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
-; CHECK: [[LD1:%[^ ]+]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
-; CHECK: tail call void @llvm.masked.store.v8i16.p0(<8 x i16> {{.*}}, ptr {{.*}}, i32 4, <8 x i1> [[VCTP]])
 define dso_local arm_aapcs_vfpcc void @mul_v8i16(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) {
+; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mul_v8i16(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 7
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 3
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -8
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 8
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef)
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw <8 x i16> [[WIDE_MASKED_LOAD2]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v8i16.p0(<8 x i16> [[MUL]], ptr [[TMP6]], i32 4, <8 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 7
@@ -90,16 +139,41 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-; CHECK-LABEL: mul_v4i32
-; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1
-; CHECK: vector.body:
-; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]])
-; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4
-; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef)
-; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef)
-; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> [[VCTP]])
 define dso_local arm_aapcs_vfpcc void @mul_v4i32(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) {
+; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mul_v4i32(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -4
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 2
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 4
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD2]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[MUL]], ptr [[TMP6]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 3
@@ -134,17 +208,47 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-; CHECK-LABEL: split_vector
-; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1
-; CHECK: vector.body:
-; CHECK: %index = phi i32
-; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]])
-; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4
-; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef)
-; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef)
-; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> [[VCTP]])
 define dso_local arm_aapcs_vfpcc void @split_vector(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) {
+; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @split_vector(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -4
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 2
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 4
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[EXTRACT_1_LOW:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[EXTRACT_1_HIGH:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[EXTRACT_2_LOW:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD2]], <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[EXTRACT_2_HIGH:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD2]], <4 x i32> undef, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw <2 x i32> [[EXTRACT_1_LOW]], [[EXTRACT_2_LOW]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> [[EXTRACT_1_HIGH]], [[EXTRACT_2_HIGH]]
+; CHECK-NEXT:    [[COMBINE:%.*]] = shufflevector <2 x i32> [[MUL]], <2 x i32> [[SUB]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[COMBINE]], ptr [[TMP6]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 3
@@ -186,14 +290,48 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 }
 
 ; One of the loads now uses ult predicate.
-; CHECK-LABEL: mismatch_load_pred
-; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]])
-; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4
-; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef)
-; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> %wrong, <4 x i32> undef)
-; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> [[VCTP]])
 define dso_local arm_aapcs_vfpcc void @mismatch_load_pred(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) {
+; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mismatch_load_pred(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -4
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 2
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 4
+; CHECK-NEXT:    [[WRONG:%.*]] = icmp ult <4 x i32> [[INDUCTION]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD12:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[WRONG]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP5:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP6]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 3
@@ -236,17 +374,48 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 }
 
 ; The store now uses ult predicate.
-; CHECK-LABEL: mismatch_store_pred
-; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1
-; CHECK: vector.body:
-; CHECK: %index = phi i32
-; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[REMAINING:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELEMS]])
-; CHECK: [[REMAINING]] = sub i32 [[ELEMS]], 4
-; CHECK: [[LD0:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef)
-; CHECK: [[LD1:%[^ ]+]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]], <4 x i32> undef)
-; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> %wrong)
 define dso_local arm_aapcs_vfpcc void @mismatch_store_pred(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) {
+; CHECK-LABEL: define dso_local arm_aapcs_vfpcc void @mismatch_store_pred(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -4
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 2
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 4
+; CHECK-NEXT:    [[WRONG:%.*]] = icmp ult <4 x i32> [[INDUCTION]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD12:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP5:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP6]], i32 4, <4 x i1> [[WRONG]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 3
@@ -294,14 +463,72 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
 ;
 ;   Step value 16 doesn't match vector width 4
 ;
-; CHECK-LABEL: interleave4
-; CHECK: vector.body:
-; CHECK:  %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N)
-; CHECK:  %active.lane.mask{{.*}} = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %v7, i32 %N)
-; CHECK:  %active.lane.mask{{.*}} = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %v8, i32 %N)
-; CHECK:  %active.lane.mask{{.*}} = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %v9, i32 %N)
-;
 define dso_local void @interleave4(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @interleave4(
+; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias readonly captures(none) [[C:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    [[V0:%.*]] = add i32 [[N]], 15
+; CHECK-NEXT:    [[V1:%.*]] = lshr i32 [[V0]], 4
+; CHECK-NEXT:    [[V2:%.*]] = shl nuw i32 [[V1]], 4
+; CHECK-NEXT:    [[V3:%.*]] = add i32 [[V2]], -16
+; CHECK-NEXT:    [[V4:%.*]] = lshr i32 [[V3]], 4
+; CHECK-NEXT:    [[V5:%.*]] = add nuw nsw i32 [[V4]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[VECTOR_PH:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, ptr [[A]], i32 8
+; CHECK-NEXT:    [[SCEVGEP30:%.*]] = getelementptr i32, ptr [[C]], i32 8
+; CHECK-NEXT:    [[SCEVGEP37:%.*]] = getelementptr i32, ptr [[B]], i32 8
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[V5]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[LSR_IV38:%.*]] = phi ptr [ [[SCEVGEP39:%.*]], %[[VECTOR_BODY]] ], [ [[SCEVGEP37]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV31:%.*]] = phi ptr [ [[SCEVGEP32:%.*]], %[[VECTOR_BODY]] ], [ [[SCEVGEP30]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP25:%.*]], %[[VECTOR_BODY]] ], [ [[SCEVGEP]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[V14:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[V6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[V15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
+; CHECK-NEXT:    [[V7:%.*]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK15:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[V7]], i32 [[N]])
+; CHECK-NEXT:    [[V8:%.*]] = add i32 [[V7]], 4
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK16:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[V8]], i32 [[N]])
+; CHECK-NEXT:    [[V9:%.*]] = add i32 [[V8]], 4
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK17:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[V9]], i32 [[N]])
+; CHECK-NEXT:    [[SCEVGEP42:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV38]], i32 -2
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[SCEVGEP42]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[SCEVGEP43:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV38]], i32 -1
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD18:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[SCEVGEP43]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK15]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD19:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[LSR_IV38]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK16]], <4 x i32> undef)
+; CHECK-NEXT:    [[SCEVGEP41:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV38]], i32 1
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD20:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[SCEVGEP41]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK17]], <4 x i32> undef)
+; CHECK-NEXT:    [[SCEVGEP34:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV31]], i32 -2
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD21:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[SCEVGEP34]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[SCEVGEP35:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV31]], i32 -1
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD22:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[SCEVGEP35]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK15]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD23:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[LSR_IV31]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK16]], <4 x i32> undef)
+; CHECK-NEXT:    [[SCEVGEP36:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV31]], i32 1
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD24:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull [[SCEVGEP36]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK17]], <4 x i32> undef)
+; CHECK-NEXT:    [[V10:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD21]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[V11:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD22]], [[WIDE_MASKED_LOAD18]]
+; CHECK-NEXT:    [[V12:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD23]], [[WIDE_MASKED_LOAD19]]
+; CHECK-NEXT:    [[V13:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD24]], [[WIDE_MASKED_LOAD20]]
+; CHECK-NEXT:    [[SCEVGEP27:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV]], i32 -2
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V10]], ptr [[SCEVGEP27]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT:    [[SCEVGEP28:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV]], i32 -1
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V11]], ptr [[SCEVGEP28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK15]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V12]], ptr [[LSR_IV]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK16]])
+; CHECK-NEXT:    [[SCEVGEP29:%.*]] = getelementptr <4 x i32>, ptr [[LSR_IV]], i32 1
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V13]], ptr [[SCEVGEP29]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK17]])
+; CHECK-NEXT:    [[SCEVGEP25]] = getelementptr i32, ptr [[LSR_IV]], i32 16
+; CHECK-NEXT:    [[SCEVGEP32]] = getelementptr i32, ptr [[LSR_IV31]], i32 16
+; CHECK-NEXT:    [[SCEVGEP39]] = getelementptr i32, ptr [[LSR_IV38]], i32 16
+; CHECK-NEXT:    [[V14]] = add i32 [[V9]], 4
+; CHECK-NEXT:    [[V15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[V6]], i32 1)
+; CHECK-NEXT:    [[V16:%.*]] = icmp ne i32 [[V15]], 0
+; CHECK-NEXT:    br i1 [[V16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp sgt i32 %N, 0
   %v0 = add i32 %N, 15
@@ -370,12 +597,42 @@ for.cond.cleanup:
   ret void
 }
 
-; CHECK-LABEL: const_expected_in_set_loop
-; CHECK:       call <4 x i1> @llvm.get.active.lane.mask
-; CHECK-NOT:   vctp
-; CHECK:       ret void
-;
 define dso_local void @const_expected_in_set_loop(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @const_expected_in_set_loop(
+; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias readonly captures(none) [[C:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], -4
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[VECTOR_PH:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP5]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[LSR_IV17:%.*]] = phi ptr [ [[SCEVGEP18:%.*]], %[[VECTOR_BODY]] ], [ [[A]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], %[[VECTOR_BODY]] ], [ [[C]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[B]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 42)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD12:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV14]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP7]], ptr [[LSR_IV17]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4
+; CHECK-NEXT:    [[SCEVGEP18]] = getelementptr i32, ptr [[LSR_IV17]], i32 4
+; CHECK-NEXT:    [[TMP8]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP6]], i32 1)
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp sgt i32 %N, 0
   %0 = add i32 %N, 3
@@ -413,12 +670,42 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-; CHECK-LABEL: tripcount_arg_not_invariant
-; CHECK:       call <4 x i1> @llvm.get.active.lane.mask
-; CHECK-NOT:   vctp
-; CHECK:       ret void
-;
 define dso_local void @tripcount_arg_not_invariant(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @tripcount_arg_not_invariant(
+; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias readonly captures(none) [[C:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], -4
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[VECTOR_PH:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP5]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[LSR_IV17:%.*]] = phi ptr [ [[SCEVGEP18:%.*]], %[[VECTOR_BODY]] ], [ [[A]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], %[[VECTOR_BODY]] ], [ [[C]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[B]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[INDEX]])
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD12:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV14]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP7]], ptr [[LSR_IV17]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4
+; CHECK-NEXT:    [[SCEVGEP18]] = getelementptr i32, ptr [[LSR_IV17]], i32 4
+; CHECK-NEXT:    [[TMP8]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP6]], i32 1)
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[VECTOR_BODY]], label %[[VECTOR_PH]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp sgt i32 %N, 0
   %0 = add i32 %N, 3
@@ -458,12 +745,42 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-; CHECK-LABEL: addrec_base_not_zero
-; CHECK:       call <4 x i1> @llvm.get.active.lane.mask
-; CHECK-NOT:   vctp
-; CHECK:       ret void
-;
 define dso_local void @addrec_base_not_zero(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @addrec_base_not_zero(
+; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias readonly captures(none) [[C:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], -4
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[VECTOR_PH:.*]], label %[[FOR_COND_CLEANUP:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP5]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[LSR_IV17:%.*]] = phi ptr [ [[SCEVGEP18:%.*]], %[[VECTOR_BODY]] ], [ [[A]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], %[[VECTOR_BODY]] ], [ [[C]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[B]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 1, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD12:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV14]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD12]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP7]], ptr [[LSR_IV17]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4
+; CHECK-NEXT:    [[SCEVGEP18]] = getelementptr i32, ptr [[LSR_IV17]], i32 4
+; CHECK-NEXT:    [[TMP8]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP6]], i32 1)
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[VECTOR_BODY]], label %[[VECTOR_PH]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp sgt i32 %N, 0
   %0 = add i32 %N, 3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
index fa6a66b..9775cf9 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll
@@ -1,15 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name INST --version 6
 ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
 
-; CHECK-LABEL: reduction_i32
-; CHECK: phi i32 [ 0, %vector.ph ]
-; CHECK: phi <8 x i16> [ zeroinitializer, %vector.ph ]
-; CHECK: phi i32
-; CHECK: [[PHI:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[PHI]])
-; CHECK: [[ELEMS]] = sub i32 [[PHI]], 8
-; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
-; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp5, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
 define i16 @reduction_i32(ptr nocapture readonly %A, ptr nocapture readonly %B, i32 %N) {
+; CHECK-LABEL: define i16 @reduction_i32(
+; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N_VEC]], -8
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP2]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INSTTMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP4]])
+; CHECK-NEXT:    [[TMP6]] = sub i32 [[TMP4]], 8
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[INSTTMP2]], i32 4, <8 x i1> [[TMP5]], <8 x i16> undef)
+; CHECK-NEXT:    [[INSTTMP5:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[INSTTMP5]], i32 4, <8 x i1> [[TMP5]], <8 x i16> undef)
+; CHECK-NEXT:    [[TMP7:%.*]] = add <8 x i16> [[WIDE_MASKED_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP8]] = add <8 x i16> [[TMP7]], [[WIDE_MASKED_LOAD3]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP9]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1)
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[VEC_PHI_LCSSA:%.*]] = phi <8 x i16> [ [[VEC_PHI]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DOTLCSSA3:%.*]] = phi <8 x i1> [ [[TMP5]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi <8 x i16> [ [[TMP8]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[DOTLCSSA3]], <8 x i16> [[DOTLCSSA]], <8 x i16> [[VEC_PHI_LCSSA]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP10]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i16> [[RDX_SHUF]], [[TMP10]]
+; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX5:%.*]] = add <8 x i16> [[RDX_SHUF4]], [[BIN_RDX]]
+; CHECK-NEXT:    [[RDX_SHUF6:%.*]] = shufflevector <8 x i16> [[BIN_RDX5]], <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX7:%.*]] = add <8 x i16> [[RDX_SHUF6]], [[BIN_RDX5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i16> [[BIN_RDX7]], i32 0
+; CHECK-NEXT:    ret i16 [[TMP11]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    [[RES_0:%.*]] = phi i16 [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    ret i16 [[RES_0]]
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   br i1 %cmp8, label %for.cond.cleanup, label %vector.ph
@@ -59,16 +99,52 @@ for.cond.cleanup:
   ret i16 %res.0
 }
 
-; CHECK-LABEL: reduction_i32_with_scalar
-; CHECK: vector.body:
-; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %{{.*}}, %vector.body ]
-; CHECK: %{{.*}} = phi i32 [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
-; CHECK: [[PHI:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[PHI]])
-; CHECK: [[ELEMS]] = sub i32 [[PHI]], 8
-; CHECK: call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp2, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
 define i16 @reduction_i32_with_scalar(ptr nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr {
+; CHECK-LABEL: define i16 @reduction_i32_with_scalar(
+; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i16 [[B:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[N_RND_UP:%.*]] = add nuw nsw i32 [[TMP]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT3]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N_VEC]], -8
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP2]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[INSTTMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INSTTMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP4]])
+; CHECK-NEXT:    [[TMP6]] = sub i32 [[TMP4]], 8
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[INSTTMP2]], i32 4, <8 x i1> [[TMP5]], <8 x i16> undef)
+; CHECK-NEXT:    [[INSTTMP5:%.*]] = add <8 x i16> [[VEC_PHI]], [[BROADCAST_SPLAT4]]
+; CHECK-NEXT:    [[INSTTMP6]] = add <8 x i16> [[INSTTMP5]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP7]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1)
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[INSTTMP8:%.*]] = select <8 x i1> [[TMP5]], <8 x i16> [[INSTTMP6]], <8 x i16> [[VEC_PHI]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[INSTTMP8]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i16> [[RDX_SHUF]], [[INSTTMP8]]
+; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX6:%.*]] = add <8 x i16> [[RDX_SHUF5]], [[BIN_RDX]]
+; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <8 x i16> [[BIN_RDX6]], <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <8 x i16> [[RDX_SHUF7]], [[BIN_RDX6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i16> [[BIN_RDX8]], i32 0
+; CHECK-NEXT:    ret i16 [[TMP9]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    [[RES_0:%.*]] = phi i16 [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    ret i16 [[RES_0]]
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   br i1 %cmp8, label %for.cond.cleanup, label %vector.ph
@@ -119,15 +195,46 @@ for.cond.cleanup:
 ; despite this we can still calculate a precise enough range so that the
 ; the overflow checks for get.active.active.lane.mask don't reject
 ; tail-predication.
-;
-; CHECK-LABEL: @reduction_not_guarded
-;
-; CHECK:     vector.body:
-; CHECK:     @llvm.arm.mve.vctp
-; CHECK-NOT: @llvm.get.active.lane.mask.v8i1.i32
-; CHECK:     ret
-;
 define i16 @reduction_not_guarded(ptr nocapture readonly %A, i16 %B, i32 %N) local_unnamed_addr {
+; CHECK-LABEL: define i16 @reduction_not_guarded(
+; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i16 [[B:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[N_RND_UP:%.*]] = add nuw nsw i32 [[TMP]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT3]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N_VEC]], -8
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP2]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, %[[ENTRY]] ], [ [[INSTTMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INSTTMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP4]])
+; CHECK-NEXT:    [[TMP6]] = sub i32 [[TMP4]], 8
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[INSTTMP2]], i32 4, <8 x i1> [[TMP5]], <8 x i16> undef)
+; CHECK-NEXT:    [[INSTTMP5:%.*]] = add <8 x i16> [[VEC_PHI]], [[BROADCAST_SPLAT4]]
+; CHECK-NEXT:    [[INSTTMP6]] = add <8 x i16> [[INSTTMP5]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP7]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1)
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[INSTTMP8:%.*]] = select <8 x i1> [[TMP5]], <8 x i16> [[INSTTMP6]], <8 x i16> [[VEC_PHI]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[INSTTMP8]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i16> [[RDX_SHUF]], [[INSTTMP8]]
+; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX6:%.*]] = add <8 x i16> [[RDX_SHUF5]], [[BIN_RDX]]
+; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <8 x i16> [[BIN_RDX6]], <8 x i16> undef, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <8 x i16> [[RDX_SHUF7]], [[BIN_RDX6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i16> [[BIN_RDX8]], i32 0
+; CHECK-NEXT:    ret i16 [[TMP9]]
+;
 entry:
   %tmp = add i32 %N, -1
   %n.rnd.up = add nuw nsw i32 %tmp, 8
@@ -166,12 +273,76 @@ middle.block:                                     ; preds = %vector.body
   ret i16 %tmp9
 }
 
-; CHECK-LABEL: @Correlation
-; CHECK:       vector.body:
-; CHECK:       @llvm.arm.mve.vctp
-; CHECK-NOT:   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask
-;
 define dso_local void @Correlation(ptr nocapture readonly %Input, ptr nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @Correlation(
+; CHECK-SAME: ptr readonly captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]], i16 signext [[SIZE:%.*]], i16 signext [[N:%.*]], i16 signext [[SCALE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[N]] to i32
+; CHECK-NEXT:    [[CMP36:%.*]] = icmp sgt i16 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP36]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END17:.*]]
+; CHECK:       [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[SIZE]] to i32
+; CHECK-NEXT:    [[CONV1032:%.*]] = zext i16 [[SCALE]] to i32
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[CONV2]], 3
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[LSR_IV51:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], %[[FOR_END:.*]] ], [ [[TMP0]], %[[FOR_BODY_LR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV46:%.*]] = phi ptr [ [[SCEVGEP47:%.*]], %[[FOR_END]] ], [ [[INPUT]], %[[FOR_BODY_LR_PH]] ]
+; CHECK-NEXT:    [[I_037:%.*]] = phi i32 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INC16:%.*]], %[[FOR_END]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw i32 [[I_037]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[CONV2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nsw i32 [[I_037]], -1
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP0]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP4]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw i32 [[TMP5]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], -4
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i32 [[TMP7]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = add nuw nsw i32 [[TMP8]], 1
+; CHECK-NEXT:    [[CMP433:%.*]] = icmp slt i32 [[I_037]], [[CONV2]]
+; CHECK-NEXT:    br i1 [[CMP433]], label %[[VECTOR_PH:.*]], label %[[FOR_END]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP9]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[LSR_IV48:%.*]] = phi ptr [ [[SCEVGEP49:%.*]], %[[VECTOR_BODY]] ], [ [[LSR_IV46]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[INPUT]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP11]])
+; CHECK-NEXT:    [[TMP13]] = sub i32 [[TMP11]], 4
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[LSR_IV]], i32 2, <4 x i1> [[TMP12]], <4 x i16> undef)
+; CHECK-NEXT:    [[TMP14:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD]] to <4 x i32>
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD42:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[LSR_IV48]], i32 2, <4 x i1> [[TMP12]], <4 x i16> undef)
+; CHECK-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[WIDE_MASKED_LOAD42]] to <4 x i32>
+; CHECK-NEXT:    [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP14]]
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> undef, i32 [[CONV1032]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP19:%.*]] = ashr <4 x i32> [[TMP16]], [[TMP18]]
+; CHECK-NEXT:    [[TMP20]] = add <4 x i32> [[TMP19]], [[VEC_PHI]]
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i16, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP49]] = getelementptr i16, ptr [[LSR_IV48]], i32 4
+; CHECK-NEXT:    [[TMP21]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP10]], i32 1)
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+; CHECK-NEXT:    br i1 [[TMP22]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> [[TMP20]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP23]])
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP24]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP25:%.*]] = lshr i32 [[SUM_0_LCSSA]], 16
+; CHECK-NEXT:    [[CONV13:%.*]] = trunc i32 [[TMP25]] to i16
+; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, ptr [[OUTPUT]], i32 [[I_037]]
+; CHECK-NEXT:    store i16 [[CONV13]], ptr [[ARRAYIDX14]], align 2
+; CHECK-NEXT:    [[INC16]] = add nuw nsw i32 [[I_037]], 1
+; CHECK-NEXT:    [[SCEVGEP47]] = getelementptr i16, ptr [[LSR_IV46]], i32 1
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV51]], -1
+; CHECK-NEXT:    [[EXITCOND39:%.*]] = icmp eq i32 [[INC16]], [[CONV]]
+; CHECK-NEXT:    br i1 [[EXITCOND39]], label %[[FOR_END17]], label %[[FOR_BODY]]
+; CHECK:       [[FOR_END17]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %conv = sext i16 %N to i32
   %cmp36 = icmp sgt i16 %N, 0
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
index a8ad360..b54d526 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
@@ -1,8 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
-; CHECK-LABEL: expand_v8i16_v8i32
-; CHECK-NOT: call i32 @llvm.arm.mve.vctp
 define void @expand_v8i16_v8i32(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) {
+; CHECK-LABEL: define void @expand_v8i16_v8i32(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 7
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 3
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -8
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 8
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef)
+; CHECK-NEXT:    [[EXPAND_1:%.*]] = zext <8 x i16> [[WIDE_MASKED_LOAD]] to <8 x i32>
+; CHECK-NEXT:    [[EXPAND_2:%.*]] = zext <8 x i16> [[WIDE_MASKED_LOAD2]] to <8 x i32>
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw <8 x i32> [[EXPAND_2]], [[EXPAND_1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> [[MUL]], ptr [[TMP6]], i32 4, <8 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 7
@@ -39,15 +74,57 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-; CHECK-LABEL: expand_v8i16_v4i32
-; CHECK: [[ELEMS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[ELEMS_REM:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[ELEMS]])
-; CHECK: [[ELEMS_REM]] = sub i32 [[ELEMS]], 8
-; CHECK: tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr {{.*}}, i32 4, <8 x i1> [[VCTP]], <8 x i16> undef)
-; CHECK: %store.pred = icmp ule <4 x i32> %induction.store
-; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> %store.pred)
-; CHECK: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> {{.*}}, ptr {{.*}}, i32 4, <4 x i1> %store.pred)
 define void @expand_v8i16_v4i32(ptr readonly %a, ptr readonly %b, ptr %c, ptr %d, i32 %N) {
+; CHECK-LABEL: define void @expand_v8i16_v4i32(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 7
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 3
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -8
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10_STORE:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT11_STORE:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10_STORE]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[STORE_IDX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[STORE_IDX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.vctp16(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 8
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i16> undef)
+; CHECK-NEXT:    [[EXTRACT_2_LOW:%.*]] = shufflevector <8 x i16> [[WIDE_MASKED_LOAD2]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[EXTRACT_2_HIGH:%.*]] = shufflevector <8 x i16> [[WIDE_MASKED_LOAD2]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[EXPAND_1:%.*]] = zext <4 x i16> [[EXTRACT_2_LOW]] to <4 x i32>
+; CHECK-NEXT:    [[EXPAND_2:%.*]] = zext <4 x i16> [[EXTRACT_2_HIGH]] to <4 x i32>
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw <4 x i32> [[EXPAND_2]], [[EXPAND_1]]
+; CHECK-NEXT:    [[SUB:%.*]] = mul nsw <4 x i32> [[EXPAND_1]], [[EXPAND_2]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT_STORE:%.*]] = insertelement <4 x i32> undef, i32 [[STORE_IDX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT_STORE:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT_STORE]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION_STORE:%.*]] = add <4 x i32> [[BROADCAST_SPLAT_STORE]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[STORE_PRED:%.*]] = icmp ule <4 x i32> [[INDUCTION_STORE]], [[BROADCAST_SPLAT11_STORE]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i32 [[STORE_IDX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[MUL]], ptr [[TMP6]], i32 4, <4 x i1> [[STORE_PRED]])
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[D]], i32 [[STORE_IDX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[SUB]], ptr [[GEP]], i32 4, <4 x i1> [[STORE_PRED]])
+; CHECK-NEXT:    [[STORE_IDX_NEXT]] = add i32 [[STORE_IDX]], 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 7
@@ -98,9 +175,43 @@ for.cond.cleanup:                                 ; preds = %vector.body, %entry
   ret void
 }
 
-; CHECK-LABEL: expand_v4i32_v4i64
-; CHECK-NOT: call i32 @llvm.arm.mve.vctp
 define void @expand_v4i32_v4i64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i32 %N) {
+; CHECK-LABEL: define void @expand_v4i32_v4i64(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], ptr noalias captures(none) [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i32 [[TMP9]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -4
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP11]], 2
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw nsw i32 [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP13]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2]] = sub i32 [[TMP0]], 4
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[EXPAND_1:%.*]] = zext <4 x i32> [[WIDE_MASKED_LOAD]] to <4 x i64>
+; CHECK-NEXT:    [[EXPAND_2:%.*]] = zext <4 x i32> [[WIDE_MASKED_LOAD2]] to <4 x i64>
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw <4 x i64> [[EXPAND_2]], [[EXPAND_1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v4i64.p0(<4 x i64> [[MUL]], ptr [[TMP6]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP15]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP14]], i32 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[VECTOR_BODY]], label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %tmp8 = add i32 %N, 3
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
index ec542df..fb1a4a4 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
@@ -1,24 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 
 ; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
 
-; CHECK-LABEL: vec_mul_reduce_add
-
-; CHECK: vector.ph:
-; CHECK:  %start = call i32 @llvm.start.loop.iterations.i32
-; CHECK:  br label %vector.body
-
-; CHECK: vector.body:
-; CHECK: [[ELTS:%[^ ]+]] = phi i32 [ %N, %vector.ph ], [ [[SUB:%[^ ]+]], %vector.body ]
-; CHECK: [[VCTP:%[^ ]+]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[ELTS]])
-; CHECK: [[SUB]] = sub i32 [[ELTS]], 4
-; CHECK: call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]]
-; CHECK: call <4 x i32> @llvm.masked.load.v4i32.p0(ptr {{.*}}, i32 4, <4 x i1> [[VCTP]],
-
-; CHECK: middle.block:
-; CHECK: [[VPSEL:%[^ ]+]] = select <4 x i1> [[VCTP]],
-; CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VPSEL]])
-
 define i32 @vec_mul_reduce_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
+; CHECK-LABEL: define i32 @vec_mul_reduce_add(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], -4
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 1
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP5]])
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], %[[VECTOR_BODY]] ], [ [[A]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[VECTOR_BODY]] ], [ [[B]], %[[VECTOR_PH]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ [[START]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ [[N]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP7]])
+; CHECK-NEXT:    [[TMP9]] = sub i32 [[TMP7]], 4
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV2]], i32 4, <4 x i1> [[TMP8]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD13:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP8]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP10:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD13]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP11]] = add nsw <4 x i32> [[TMP10]], [[VEC_PHI]]
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP3]] = getelementptr i32, ptr [[LSR_IV2]], i32 4
+; CHECK-NEXT:    [[TMP12]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP6]], i32 1)
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[VECTOR_BODY]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP11]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]])
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    [[RES_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP15]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[RES_0_LCSSA]]
+;
 entry:
   %cmp8 = icmp eq i32 %N, 0
   %0 = add i32 %N, 3
diff --git a/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll b/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll
new file mode 100644
index 0000000..ce7024d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/absolute-symbol-kernel-code-model.ll
@@ -0,0 +1,34 @@
+; RUN: llc --code-model=kernel < %s -asm-verbose=0 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: func_no_abs_sym
+define i64 @func_no_abs_sym() nounwind {
+  ; CHECK: movq $no_abs_sym, %rax
+  %1 = ptrtoint ptr @no_abs_sym to i64
+  ret i64 %1
+}
+
+; CHECK-LABEL: func_abs_sym
+define i64 @func_abs_sym() nounwind {
+  ; CHECK: movabsq $abs_sym, %rax
+  %1 = ptrtoint ptr @abs_sym to i64
+  ret i64 %1
+}
+
+; CHECK-LABEL: func_abs_sym_in_range
+define i64 @func_abs_sym_in_range() nounwind {
+  ;; The absolute_symbol range fits in 32 bits but we still use movabs
+  ;; since there's no benefit to using the sign extending instruction
+  ;; with absolute symbols.
+  ; CHECK: movabsq $abs_sym_in_range, %rax
+  %1 = ptrtoint ptr @abs_sym_in_range to i64
+  ret i64 %1
+}
+
+@no_abs_sym = external hidden global [0 x i8]
+@abs_sym = external hidden global [0 x i8], !absolute_symbol !0
+@abs_sym_in_range = external hidden global [0 x i8], !absolute_symbol !1
+
+!0 = !{i64 -1, i64 -1}  ;; Full range
+!1 = !{i64 -2147483648, i64 2147483648}  ;; In range
diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll
index b2651e9..de9caa5 100644
--- a/llvm/test/CodeGen/X86/apx/cf.ll
+++ b/llvm/test/CodeGen/X86/apx/cf.ll
@@ -230,6 +230,24 @@ entry:
   ret void
 }
 
+define void @and_cond(i32 %a, i1 %b) {
+; CHECK-LABEL: and_cond:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    notb %sil
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testb %al, %sil
+; CHECK-NEXT:    cfcmovnel %ecx, 0
+; CHECK-NEXT:    retq
+  %is_pos = icmp sgt i32 %a, 0
+  %not_b = xor i1 %b, true
+  %cond = and i1 %not_b, %is_pos
+  %mask = insertelement <1 x i1> zeroinitializer, i1 %cond, i64 0
+  call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr null, i32 1, <1 x i1> %mask)
+  ret void
+}
+
 define i64 @redundant_test(i64 %num, ptr %p1, i64 %in) {
 ; CHECK-LABEL: redundant_test:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll
index 2aea9c1..632d90d 100644
--- a/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll
+++ b/llvm/test/CodeGen/X86/call-graph-section-addrtaken.ll
@@ -27,7 +27,7 @@ entry:
 !1 = !{i64 0, !"_ZTSFivE.generalized"}
 !2 = !{i64 0, !"_ZTSFviE.generalized"}
 
-; CHECK: .section .callgraph,"o",@progbits,.text
+; CHECK: .section .llvm.callgraph,"o",@progbits,.text
 ;; Version
 ; CHECK-NEXT: .byte   0
 ;; Flags -- Potential indirect target so LSB is set to 1. Other bits are 0.
diff --git a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll
index 1aabf66..ed6849a 100644
--- a/llvm/test/CodeGen/X86/call-graph-section-assembly.ll
+++ b/llvm/test/CodeGen/X86/call-graph-section-assembly.ll
@@ -1,8 +1,8 @@
 ;; Test if temporary labels are generated for each indirect callsite.
-;; Test if the .callgraph section contains the MD5 hash of callees' type (type id)
+;; Test if the .llvm.callgraph section contains the MD5 hash of callees' type (type id)
 ;; is correctly paired with its corresponding temporary label generated for indirect
 ;; call sites annotated with !callee_type metadata.
-;; Test if the .callgraph section contains unique direct callees.
+;; Test if the .llvm.callgraph section contains unique direct callees.
 
 ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -o - < %s | FileCheck %s
 
@@ -36,7 +36,7 @@ entry:
 !4 = !{!5}
 !5 = !{i64 0, !"_ZTSFPvS_E.generalized"}
 
-; CHECK: .section .callgraph,"o",@progbits,.text
+; CHECK: .section .llvm.callgraph,"o",@progbits,.text
 ;; Version
 ; CHECK-NEXT: .byte   0
 ;; Flags
diff --git a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll
index 34dc5b8..49cc335 100644
--- a/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll
+++ b/llvm/test/CodeGen/X86/call-graph-section-tailcall.ll
@@ -1,7 +1,10 @@
-;; Tests that we store the type identifiers in .callgraph section of the object file for tailcalls.
+;; Tests that we store the type identifiers in .llvm.callgraph section of the object file for tailcalls.
+
+; REQUIRES: x86-registered-target
+; REQUIRES: arm-registered-target
 
 ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \
-; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s
 
 define i32 @check_tailcall(ptr %func, i8 %x) !type !0 {
 entry:
@@ -27,7 +30,7 @@ declare !type !2 i32 @bar(i8 signext)
 !2 = !{i64 0, !"_ZTSFicE.generalized"}
 !3 = !{i64 0, !"_ZTSFiiE.generalized"}
 
-; CHECK: Hex dump of section '.callgraph':
+; CHECK: Hex dump of section '.llvm.callgraph':
 ; CHECK-NEXT: 0x00000000 00050000 00000000 00008e19 0b7f3326
 ; CHECK-NEXT: 0x00000010 e3000154 86bc5981 4b8e3000 05000000
 ;; Verify that the type id 0x308e4b8159bc8654 is in section.
diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll b/llvm/test/CodeGen/X86/call-graph-section.ll
index c144a24..8a1c6ca 100644
--- a/llvm/test/CodeGen/X86/call-graph-section.ll
+++ b/llvm/test/CodeGen/X86/call-graph-section.ll
@@ -1,7 +1,10 @@
-;; Tests that we store the type identifiers in .callgraph section of the object file.
+;; Tests that we store the type identifiers in .llvm.callgraph section of the object file.
+
+; REQUIRES: x86-registered-target
+; REQUIRES: arm-registered-target
 
 ; RUN: llc -mtriple=x86_64-unknown-linux --call-graph-section -filetype=obj -o - < %s | \
-; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+; RUN: llvm-readelf -x .llvm.callgraph - | FileCheck %s
 
 declare !type !0 void @foo()
 
@@ -31,7 +34,7 @@ entry:
 
 ;; Make sure following type IDs are in call graph section
 ;; 0x5eecb3e2444f731f, 0x814b8e305486bc59, 0xf897fd777ade6814
-; CHECK:      Hex dump of section '.callgraph':
+; CHECK:      Hex dump of section '.llvm.callgraph':
 ; CHECK-NEXT: 0x00000000 00050000 00000000 00000000 00000000
 ; CHECK-NEXT: 0x00000010 00000324 44f731f5 eecb3e54 86bc5981
 ; CHECK-NEXT: 0x00000020 4b8e307a de6814f8 97fd77
diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll
index 40c38c2..71253c8 100644
--- a/llvm/test/CodeGen/X86/cpus-intel.ll
+++ b/llvm/test/CodeGen/X86/cpus-intel.ll
@@ -38,6 +38,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=wildcatlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
@@ -104,6 +105,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=wildcatlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
index f3950b7..b2b0a6d 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
@@ -1,17 +1,101 @@
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-;; A minimal test case. Subsequent PRs will expand on this test case
-;; (e.g., with more functions, variables and profiles) and test the hotness
-;; reconcillation implementation.
+;; Requires asserts for -debug-only.
+; REQUIRES: asserts
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \
+; RUN:     -partition-static-data-sections=true \
+; RUN:     -debug-only=static-data-profile-info \
+; RUN:     -data-sections=true  -unique-section-names=false \
+; RUN:     input-with-data-access-prof-on.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR
+
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \
 ; RUN:     -partition-static-data-sections=true \
+; RUN:     -debug-only=static-data-profile-info \
 ; RUN:     -data-sections=true  -unique-section-names=false \
-; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefix=IR
+; RUN:     input-with-data-access-prof-off.ll -o - 2>&1 | FileCheck %s --check-prefixes=OFF
+
+; LOG: hot_bss has section prefix hot, the max from data access profiles as hot and PGO counters as hot
+; LOG: data_unknown_hotness has section prefix <empty>, the max from data access profiles as <empty> and PGO counters as unlikely
+; LOG: external_relro_array has section prefix unlikely, solely from data access profiles
+
+; IR:          .type   hot_bss,@object
+; IR-NEXT:     .section .bss.hot.,"aw"
+; IR:          .type   data_unknown_hotness,@object
+; IR-NEXT:    .section .data,"aw"
+; IR:          .type   external_relro_array,@object
+; IR-NEXT:     .section        .data.rel.ro.unlikely.,"aw"
+
+
+; OFF:        .type   hot_bss,@object
+; OFF-NEXT:   .section        .bss.hot.,"aw"
+; OFF:        .type   data_unknown_hotness,@object
+; OFF-NEXT:   .section        .data.unlikely.,"aw"
+;; Global variable section prefix metadata is not used when
+;; module flag `EnableDataAccessProf` is 0, and @external_relro_array has
+;; external linkage, so analysis based on PGO counters doesn't apply. 
+; OFF:        .type   external_relro_array,@object    # @external_relro_array
+; OFF-NEXT:   .section        .data.rel.ro,"aw"
+
+;--- input-with-data-access-prof-on.ll
+; Internal vars
+@hot_bss = internal global i32 0, !section_prefix !17
+@data_unknown_hotness = internal global i32 1
+; External vars
+@external_relro_array = constant [2 x ptr] [ptr @hot_bss, ptr @data_unknown_hotness], !section_prefix !18
+
+define void @cold_func() !prof !15 {
+  %9 = load i32, ptr @data_unknown_hotness
+  %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9)
+  ret void
+}
+
+define void @hot_func() !prof !14 {
+  %9 = load i32, ptr @hot_bss
+  %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9)
+  ret void
+}
+
+declare i32 @func_taking_arbitrary_param(...)
 
-; IR: .section .bss.hot.,"aw"
+!llvm.module.flags = !{!0, !1}
 
+!0 = !{i32 2, !"EnableDataAccessProf", i32 1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460183}
+!5 = !{!"MaxCount", i64 849024}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849024}
+!8 = !{!"NumCounts", i64 23627}
+!9 = !{!"NumFunctions", i64 3271}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13}
+!12 = !{i32 990000, i64 166, i32 73}
+!13 = !{i32 999999, i64 3, i32 1443}
+!14 = !{!"function_entry_count", i64 100000}
+!15 = !{!"function_entry_count", i64 1}
+!16 = !{!"branch_weights", i32 1, i32 99999}
+!17 = !{!"section_prefix", !"hot"}
+!18 = !{!"section_prefix", !"unlikely"}
+
+;--- input-with-data-access-prof-off.ll
+; Same as file above except that module flag `EnableDataAccessProf` has value 0.
+; Internal vars
 @hot_bss = internal global i32 0, !section_prefix !17
+@data_unknown_hotness = internal global i32 1
+; External vars
+@external_relro_array = constant [2 x ptr] [ptr @hot_bss, ptr @data_unknown_hotness], !section_prefix !18
+
+define void @cold_func() !prof !15 {
+  %9 = load i32, ptr @data_unknown_hotness
+  %11 = call i32 (...) @func_taking_arbitrary_param(i32 %9)
+  ret void
+}
 
 define void @hot_func() !prof !14 {
   %9 = load i32, ptr @hot_bss
@@ -21,8 +105,9 @@ define void @hot_func() !prof !14 {
 
 declare i32 @func_taking_arbitrary_param(...)
 
-!llvm.module.flags = !{!1}
+!llvm.module.flags = !{!0, !1}
 
+!0 = !{i32 2, !"EnableDataAccessProf", i32 0}
 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
 !3 = !{!"ProfileFormat", !"InstrProf"}
@@ -40,3 +125,4 @@ declare i32 @func_taking_arbitrary_param(...)
 !15 = !{!"function_entry_count", i64 1}
 !16 = !{!"branch_weights", i32 1, i32 99999}
 !17 = !{!"section_prefix", !"hot"}
+!18 = !{!"section_prefix", !"unlikely"}
diff --git a/llvm/test/MC/X86/verify-callgraph-section.s b/llvm/test/MC/X86/verify-callgraph-section.s
index ce07228..9be5a68 100644
--- a/llvm/test/MC/X86/verify-callgraph-section.s
+++ b/llvm/test/MC/X86/verify-callgraph-section.s
@@ -2,7 +2,7 @@
 /// (annotated by generated temporary labels .Ltmp*) are associated
 /// with the corresponding callee type identifiers.
 
-// RUN: llvm-mc -triple=x86_64 -filetype=obj -o - < %s | llvm-readelf -x .callgraph - | FileCheck %s
+// RUN: llvm-mc -triple=x86_64 -filetype=obj -o - < %s | llvm-readelf -x .llvm.callgraph - | FileCheck %s
 	
 	.text
 	.globl	ball                            # -- Begin function ball
@@ -38,7 +38,7 @@ ball:                                   # @ball
 	addq	$32, %rsp	
 	popq	%rbx	
 	retq
-	.section	.callgraph,"o",@progbits,.text
+	.section	.llvm.callgraph,"o",@progbits,.text
 	.quad	0
 	.quad	.Lfunc_begin0
 	.quad	1
diff --git a/llvm/test/Transforms/Inline/ML/state-accounting-skip-non-cold.ll b/llvm/test/Transforms/Inline/ML/state-accounting-skip-non-cold.ll
new file mode 100644
index 0000000..0887f5e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/state-accounting-skip-non-cold.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; REQUIRES: llvm_inliner_model_autogenerated && asserts
+; RUN: opt -passes='default<O3>' -enable-ml-inliner=release -ml-inliner-skip-policy=if-caller-not-cold -S %s -o - | FileCheck %s
+; RUN: opt -passes='default<O3>' -ml-inliner-stop-immediately -enable-ml-inliner=release -ml-inliner-skip-policy=if-caller-not-cold -S %s -o - | FileCheck %s
+
+declare ptr @f()
+
+define void @e() #0 {
+; CHECK-LABEL: define void @e(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    tail call void @d()
+; CHECK-NEXT:    tail call void @g()
+; CHECK-NEXT:    tail call void @d()
+; CHECK-NEXT:    tail call void @g()
+; CHECK-NEXT:    tail call void @d()
+; CHECK-NEXT:    tail call void @g()
+; CHECK-NEXT:    ret void
+;
+  call void @h()
+  call void @h()
+  call void @h()
+  ret void
+}
+
+define void @d() {
+; CHECK-LABEL: define void @d() local_unnamed_addr {
+; CHECK-NEXT:    tail call void @f()
+; CHECK-NEXT:    ret void
+;
+  call void @f()
+  ret void
+}
+
+define void @g() {
+; CHECK-LABEL: define void @g() local_unnamed_addr {
+; CHECK-NEXT:    tail call void @f()
+; CHECK-NEXT:    ret void
+;
+  call void @f()
+  ret void
+}
+
+define void @h() #0 {
+; CHECK-LABEL: define void @h(
+; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    tail call void @d()
+; CHECK-NEXT:    tail call void @g()
+; CHECK-NEXT:    ret void
+;
+  call void @d()
+  call void @g()
+  ret void
+}
+
+attributes #0 = { "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index ab9b48f..aff2c4c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -153,17 +153,20 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = lshr i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP2]], 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i8>
 ; CHECK-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP8]] to i64
 ; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP9]] to i64
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP14]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP15]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i8> [[TMP6]], i32 1
 ; CHECK-NEXT:    store i8 [[TMP22]], ptr [[TMP18]], align 1
-; CHECK-NEXT:    store i8 [[TMP22]], ptr [[TMP19]], align 1
+; CHECK-NEXT:    store i8 [[TMP12]], ptr [[TMP19]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
index 1533906..53dad3a 100644
--- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
+++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
@@ -74,8 +74,7 @@ exit:
   ret void
 }
 
-; FIXME: Currently this mis-compiled when interleaving; all stores store the
-; last lane of the last part, instead of the last lane per part.
+; Check each unrolled store stores the last lane of the corresponding part.
 ; Test case for https://github.com/llvm/llvm-project/issues/162498.
 define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(ptr %dst) {
 ; VF4IC1-LABEL: define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
@@ -121,13 +120,15 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
 ; VF2IC2-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; VF2IC2:       [[VECTOR_BODY]]:
 ; VF2IC2-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2IC2-NEXT:    [[TMP7:%.*]] = add i32 [[INDEX]], 0
+; VF2IC2-NEXT:    [[TMP8:%.*]] = add i32 [[INDEX]], 1
 ; VF2IC2-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 2
 ; VF2IC2-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 3
-; VF2IC2-NEXT:    [[TMP2:%.*]] = lshr i32 [[INDEX]], 1
+; VF2IC2-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP7]], 1
 ; VF2IC2-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP0]], 1
 ; VF2IC2-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]]
 ; VF2IC2-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]]
-; VF2IC2-NEXT:    store i32 [[TMP1]], ptr [[TMP4]], align 4
+; VF2IC2-NEXT:    store i32 [[TMP8]], ptr [[TMP4]], align 4
 ; VF2IC2-NEXT:    store i32 [[TMP1]], ptr [[TMP5]], align 4
 ; VF2IC2-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; VF2IC2-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
diff --git a/llvm/test/tools/llvm-objdump/ELF/Hexagon/packet-reset-on-label.s b/llvm/test/tools/llvm-objdump/ELF/Hexagon/packet-reset-on-label.s
new file mode 100644
index 0000000..02a52bb
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/Hexagon/packet-reset-on-label.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -triple=hexagon -mcpu=hexagonv75 -filetype=obj %s \
+// RUN:   | llvm-objdump -d - \
+// RUN:   | FileCheck %s
+
+foo:
+  { nop }
+  /// a nop without end-of-packet bits set to simulate data that is
+  /// not a proper packet end.
+  .long 0x7f004000
+bar:
+  { nop
+    nop
+  }
+
+// CHECK-LABEL: <foo>:
+// CHECK: { nop }
+// CHECK-NEXT: { nop
+
+/// The instruction starting after <bar> should start in a new packet.
+// CHECK-LABEL: <bar>:
+// CHECK: { nop
+// CHECK-NEXT: nop }
+
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index f04b256..8b03db3 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -218,13 +218,12 @@ static cl::opt<std::string> PassPipeline(
 static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline),
                                cl::desc("Alias for -passes"));
 
-namespace {
-
-std::vector<std::string> &getRunPassNames() {
+static std::vector<std::string> &getRunPassNames() {
   static std::vector<std::string> RunPassNames;
   return RunPassNames;
 }
 
+namespace {
 struct RunPassOption {
   void operator=(const std::string &Val) const {
     if (Val.empty())
diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
index 875ec1b..7fee06b 100644
--- a/llvm/tools/lli/lli.cpp
+++ b/llvm/tools/lli/lli.cpp
@@ -92,206 +92,202 @@ static codegen::RegisterCodeGenFlags CGF;
 #define DEBUG_TYPE "lli"
 
 namespace {
-
-  enum class JITKind { MCJIT, Orc, OrcLazy };
-  enum class JITLinkerKind { Default, RuntimeDyld, JITLink };
-
-  cl::opt<std::string>
-  InputFile(cl::desc("<input bitcode>"), cl::Positional, cl::init("-"));
-
-  cl::list<std::string>
-  InputArgv(cl::ConsumeAfter, cl::desc("<program arguments>..."));
-
-  cl::opt<bool> ForceInterpreter("force-interpreter",
-                                 cl::desc("Force interpretation: disable JIT"),
-                                 cl::init(false));
-
-  cl::opt<JITKind> UseJITKind(
-      "jit-kind", cl::desc("Choose underlying JIT kind."),
-      cl::init(JITKind::Orc),
-      cl::values(clEnumValN(JITKind::MCJIT, "mcjit", "MCJIT"),
-                 clEnumValN(JITKind::Orc, "orc", "Orc JIT"),
-                 clEnumValN(JITKind::OrcLazy, "orc-lazy",
-                            "Orc-based lazy JIT.")));
-
-  cl::opt<JITLinkerKind>
-      JITLinker("jit-linker", cl::desc("Choose the dynamic linker/loader."),
-                cl::init(JITLinkerKind::Default),
-                cl::values(clEnumValN(JITLinkerKind::Default, "default",
-                                      "Default for platform and JIT-kind"),
-                           clEnumValN(JITLinkerKind::RuntimeDyld, "rtdyld",
-                                      "RuntimeDyld"),
-                           clEnumValN(JITLinkerKind::JITLink, "jitlink",
-                                      "Orc-specific linker")));
-  cl::opt<std::string> OrcRuntime("orc-runtime",
-                                  cl::desc("Use ORC runtime from given path"),
-                                  cl::init(""));
-
-  cl::opt<unsigned>
-  LazyJITCompileThreads("compile-threads",
-                        cl::desc("Choose the number of compile threads "
-                                 "(jit-kind=orc-lazy only)"),
-                        cl::init(0));
-
-  cl::list<std::string>
-  ThreadEntryPoints("thread-entry",
-                    cl::desc("calls the given entry-point on a new thread "
-                             "(jit-kind=orc-lazy only)"));
-
-  cl::opt<bool> PerModuleLazy(
-      "per-module-lazy",
-      cl::desc("Performs lazy compilation on whole module boundaries "
-               "rather than individual functions"),
-      cl::init(false));
-
-  cl::list<std::string>
-      JITDylibs("jd",
-                cl::desc("Specifies the JITDylib to be used for any subsequent "
-                         "-extra-module arguments."));
-
-  cl::list<std::string>
-      Dylibs("dlopen", cl::desc("Dynamic libraries to load before linking"));
-
-  // The MCJIT supports building for a target address space separate from
-  // the JIT compilation process. Use a forked process and a copying
-  // memory manager with IPC to execute using this functionality.
-  cl::opt<bool> RemoteMCJIT("remote-mcjit",
-    cl::desc("Execute MCJIT'ed code in a separate process."),
+enum class JITKind { MCJIT, Orc, OrcLazy };
+enum class JITLinkerKind { Default, RuntimeDyld, JITLink };
+} // namespace
+
+static cl::opt<std::string> InputFile(cl::desc("<input bitcode>"),
+                                      cl::Positional, cl::init("-"));
+
+static cl::list<std::string> InputArgv(cl::ConsumeAfter,
+                                       cl::desc("<program arguments>..."));
+
+static cl::opt<bool>
+    ForceInterpreter("force-interpreter",
+                     cl::desc("Force interpretation: disable JIT"),
+                     cl::init(false));
+
+static cl::opt<JITKind>
+    UseJITKind("jit-kind", cl::desc("Choose underlying JIT kind."),
+               cl::init(JITKind::Orc),
+               cl::values(clEnumValN(JITKind::MCJIT, "mcjit", "MCJIT"),
+                          clEnumValN(JITKind::Orc, "orc", "Orc JIT"),
+                          clEnumValN(JITKind::OrcLazy, "orc-lazy",
+                                     "Orc-based lazy JIT.")));
+
+static cl::opt<JITLinkerKind> JITLinker(
+    "jit-linker", cl::desc("Choose the dynamic linker/loader."),
+    cl::init(JITLinkerKind::Default),
+    cl::values(clEnumValN(JITLinkerKind::Default, "default",
+                          "Default for platform and JIT-kind"),
+               clEnumValN(JITLinkerKind::RuntimeDyld, "rtdyld", "RuntimeDyld"),
+               clEnumValN(JITLinkerKind::JITLink, "jitlink",
+                          "Orc-specific linker")));
+static cl::opt<std::string>
+    OrcRuntime("orc-runtime", cl::desc("Use ORC runtime from given path"),
+               cl::init(""));
+
+static cl::opt<unsigned>
+    LazyJITCompileThreads("compile-threads",
+                          cl::desc("Choose the number of compile threads "
+                                   "(jit-kind=orc-lazy only)"),
+                          cl::init(0));
+
+static cl::list<std::string>
+    ThreadEntryPoints("thread-entry",
+                      cl::desc("calls the given entry-point on a new thread "
+                               "(jit-kind=orc-lazy only)"));
+
+static cl::opt<bool> PerModuleLazy(
+    "per-module-lazy",
+    cl::desc("Performs lazy compilation on whole module boundaries "
+             "rather than individual functions"),
     cl::init(false));
 
-  // Manually specify the child process for remote execution. This overrides
-  // the simulated remote execution that allocates address space for child
-  // execution. The child process will be executed and will communicate with
-  // lli via stdin/stdout pipes.
-  cl::opt<std::string>
-  ChildExecPath("mcjit-remote-process",
-                cl::desc("Specify the filename of the process to launch "
-                         "for remote MCJIT execution.  If none is specified,"
-                         "\n\tremote execution will be simulated in-process."),
-                cl::value_desc("filename"), cl::init(""));
-
-  // Determine optimization level.
-  cl::opt<char> OptLevel("O",
-                         cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
-                                  "(default = '-O2')"),
-                         cl::Prefix, cl::init('2'));
-
-  cl::opt<std::string>
-  TargetTriple("mtriple", cl::desc("Override target triple for module"));
-
-  cl::opt<std::string>
-  EntryFunc("entry-function",
-            cl::desc("Specify the entry function (default = 'main') "
-                     "of the executable"),
-            cl::value_desc("function"),
-            cl::init("main"));
-
-  cl::list<std::string>
-  ExtraModules("extra-module",
-         cl::desc("Extra modules to be loaded"),
-         cl::value_desc("input bitcode"));
-
-  cl::list<std::string>
-  ExtraObjects("extra-object",
-         cl::desc("Extra object files to be loaded"),
-         cl::value_desc("input object"));
-
-  cl::list<std::string>
-  ExtraArchives("extra-archive",
-         cl::desc("Extra archive files to be loaded"),
-         cl::value_desc("input archive"));
-
-  cl::opt<bool>
-  EnableCacheManager("enable-cache-manager",
-        cl::desc("Use cache manager to save/load modules"),
-        cl::init(false));
-
-  cl::opt<std::string>
-  ObjectCacheDir("object-cache-dir",
-                  cl::desc("Directory to store cached object files "
-                           "(must be user writable)"),
-                  cl::init(""));
-
-  cl::opt<std::string>
-  FakeArgv0("fake-argv0",
-            cl::desc("Override the 'argv[0]' value passed into the executing"
-                     " program"), cl::value_desc("executable"));
-
-  cl::opt<bool>
-  DisableCoreFiles("disable-core-files", cl::Hidden,
-                   cl::desc("Disable emission of core files if possible"));
-
-  cl::opt<bool>
-  NoLazyCompilation("disable-lazy-compilation",
-                  cl::desc("Disable JIT lazy compilation"),
-                  cl::init(false));
-
-  cl::opt<bool>
-  GenerateSoftFloatCalls("soft-float",
-    cl::desc("Generate software floating point library calls"),
+static cl::list<std::string>
+    JITDylibs("jd",
+              cl::desc("Specifies the JITDylib to be used for any subsequent "
+                       "-extra-module arguments."));
+
+static cl::list<std::string>
+    Dylibs("dlopen", cl::desc("Dynamic libraries to load before linking"));
+
+// The MCJIT supports building for a target address space separate from
+// the JIT compilation process. Use a forked process and a copying
+// memory manager with IPC to execute using this functionality.
+static cl::opt<bool>
+    RemoteMCJIT("remote-mcjit",
+                cl::desc("Execute MCJIT'ed code in a separate process."),
+                cl::init(false));
+
+// Manually specify the child process for remote execution. This overrides
+// the simulated remote execution that allocates address space for child
+// execution. The child process will be executed and will communicate with
+// lli via stdin/stdout pipes.
+static cl::opt<std::string> ChildExecPath(
+    "mcjit-remote-process",
+    cl::desc("Specify the filename of the process to launch "
+             "for remote MCJIT execution.  If none is specified,"
+             "\n\tremote execution will be simulated in-process."),
+    cl::value_desc("filename"), cl::init(""));
+
+// Determine optimization level.
+static cl::opt<char>
+    OptLevel("O",
+             cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+                      "(default = '-O2')"),
+             cl::Prefix, cl::init('2'));
+
+static cl::opt<std::string>
+    TargetTriple("mtriple", cl::desc("Override target triple for module"));
+
+static cl::opt<std::string>
+    EntryFunc("entry-function",
+              cl::desc("Specify the entry function (default = 'main') "
+                       "of the executable"),
+              cl::value_desc("function"), cl::init("main"));
+
+static cl::list<std::string>
+    ExtraModules("extra-module", cl::desc("Extra modules to be loaded"),
+                 cl::value_desc("input bitcode"));
+
+static cl::list<std::string>
+    ExtraObjects("extra-object", cl::desc("Extra object files to be loaded"),
+                 cl::value_desc("input object"));
+
+static cl::list<std::string>
+    ExtraArchives("extra-archive", cl::desc("Extra archive files to be loaded"),
+                  cl::value_desc("input archive"));
+
+static cl::opt<bool>
+    EnableCacheManager("enable-cache-manager",
+                       cl::desc("Use cache manager to save/load modules"),
+                       cl::init(false));
+
+static cl::opt<std::string>
+    ObjectCacheDir("object-cache-dir",
+                   cl::desc("Directory to store cached object files "
+                            "(must be user writable)"),
+                   cl::init(""));
+
+static cl::opt<std::string>
+    FakeArgv0("fake-argv0",
+              cl::desc("Override the 'argv[0]' value passed into the executing"
+                       " program"),
+              cl::value_desc("executable"));
+
+static cl::opt<bool>
+    DisableCoreFiles("disable-core-files", cl::Hidden,
+                     cl::desc("Disable emission of core files if possible"));
+
+static cl::opt<bool> NoLazyCompilation("disable-lazy-compilation",
+                                       cl::desc("Disable JIT lazy compilation"),
+                                       cl::init(false));
+
+static cl::opt<bool> GenerateSoftFloatCalls(
+    "soft-float", cl::desc("Generate software floating point library calls"),
     cl::init(false));
 
-  cl::opt<bool> NoProcessSymbols(
-      "no-process-syms",
-      cl::desc("Do not resolve lli process symbols in JIT'd code"),
-      cl::init(false));
-
-  enum class LLJITPlatform { Inactive, Auto, ExecutorNative, GenericIR };
-
-  cl::opt<LLJITPlatform> Platform(
-      "lljit-platform", cl::desc("Platform to use with LLJIT"),
-      cl::init(LLJITPlatform::Auto),
-      cl::values(clEnumValN(LLJITPlatform::Auto, "Auto",
-                            "Like 'ExecutorNative' if ORC runtime "
-                            "provided, otherwise like 'GenericIR'"),
-                 clEnumValN(LLJITPlatform::ExecutorNative, "ExecutorNative",
-                            "Use the native platform for the executor."
-                            "Requires -orc-runtime"),
-                 clEnumValN(LLJITPlatform::GenericIR, "GenericIR",
-                            "Use LLJITGenericIRPlatform"),
-                 clEnumValN(LLJITPlatform::Inactive, "Inactive",
-                            "Disable platform support explicitly")),
-      cl::Hidden);
-
-  enum class DumpKind {
-    NoDump,
-    DumpFuncsToStdOut,
-    DumpModsToStdOut,
-    DumpModsToDisk,
-    DumpDebugDescriptor,
-    DumpDebugObjects,
-  };
+static cl::opt<bool> NoProcessSymbols(
+    "no-process-syms",
+    cl::desc("Do not resolve lli process symbols in JIT'd code"),
+    cl::init(false));
 
-  cl::opt<DumpKind> OrcDumpKind(
-      "orc-lazy-debug", cl::desc("Debug dumping for the orc-lazy JIT."),
-      cl::init(DumpKind::NoDump),
-      cl::values(
-          clEnumValN(DumpKind::NoDump, "no-dump", "Don't dump anything."),
-          clEnumValN(DumpKind::DumpFuncsToStdOut, "funcs-to-stdout",
-                     "Dump function names to stdout."),
-          clEnumValN(DumpKind::DumpModsToStdOut, "mods-to-stdout",
-                     "Dump modules to stdout."),
-          clEnumValN(DumpKind::DumpModsToDisk, "mods-to-disk",
-                     "Dump modules to the current "
-                     "working directory. (WARNING: "
-                     "will overwrite existing files)."),
-          clEnumValN(DumpKind::DumpDebugDescriptor, "jit-debug-descriptor",
-                     "Dump __jit_debug_descriptor contents to stdout"),
-          clEnumValN(DumpKind::DumpDebugObjects, "jit-debug-objects",
-                     "Dump __jit_debug_descriptor in-memory debug "
-                     "objects as tool output")),
-      cl::Hidden);
-
-  ExitOnError ExitOnErr;
-}
+enum class LLJITPlatform { Inactive, Auto, ExecutorNative, GenericIR };
+
+static cl::opt<LLJITPlatform> Platform(
+    "lljit-platform", cl::desc("Platform to use with LLJIT"),
+    cl::init(LLJITPlatform::Auto),
+    cl::values(clEnumValN(LLJITPlatform::Auto, "Auto",
+                          "Like 'ExecutorNative' if ORC runtime "
+                          "provided, otherwise like 'GenericIR'"),
+               clEnumValN(LLJITPlatform::ExecutorNative, "ExecutorNative",
+                          "Use the native platform for the executor."
+                          "Requires -orc-runtime"),
+               clEnumValN(LLJITPlatform::GenericIR, "GenericIR",
+                          "Use LLJITGenericIRPlatform"),
+               clEnumValN(LLJITPlatform::Inactive, "Inactive",
+                          "Disable platform support explicitly")),
+    cl::Hidden);
+
+enum class DumpKind {
+  NoDump,
+  DumpFuncsToStdOut,
+  DumpModsToStdOut,
+  DumpModsToDisk,
+  DumpDebugDescriptor,
+  DumpDebugObjects,
+};
 
-LLVM_ATTRIBUTE_USED void linkComponents() {
+static cl::opt<DumpKind> OrcDumpKind(
+    "orc-lazy-debug", cl::desc("Debug dumping for the orc-lazy JIT."),
+    cl::init(DumpKind::NoDump),
+    cl::values(clEnumValN(DumpKind::NoDump, "no-dump", "Don't dump anything."),
+               clEnumValN(DumpKind::DumpFuncsToStdOut, "funcs-to-stdout",
+                          "Dump function names to stdout."),
+               clEnumValN(DumpKind::DumpModsToStdOut, "mods-to-stdout",
+                          "Dump modules to stdout."),
+               clEnumValN(DumpKind::DumpModsToDisk, "mods-to-disk",
+                          "Dump modules to the current "
+                          "working directory. (WARNING: "
+                          "will overwrite existing files)."),
+               clEnumValN(DumpKind::DumpDebugDescriptor, "jit-debug-descriptor",
+                          "Dump __jit_debug_descriptor contents to stdout"),
+               clEnumValN(DumpKind::DumpDebugObjects, "jit-debug-objects",
+                          "Dump __jit_debug_descriptor in-memory debug "
+                          "objects as tool output")),
+    cl::Hidden);
+
+static ExitOnError ExitOnErr;
+
+LLVM_ATTRIBUTE_USED static void linkComponents() {
   errs() << (void *)&llvm_orc_registerEHFrameSectionAllocAction
          << (void *)&llvm_orc_deregisterEHFrameSectionAllocAction
          << (void *)&llvm_orc_registerJITLoaderGDBWrapper
          << (void *)&llvm_orc_registerJITLoaderGDBAllocAction;
 }
 
+namespace {
 //===----------------------------------------------------------------------===//
 // Object cache
 //
@@ -367,6 +363,7 @@ private:
     return true;
   }
 };
+} // namespace
 
 // On Mingw and Cygwin, an external symbol named '__main' is called from the
 // generated 'main' function to allow static initialization.  To avoid linking
@@ -400,7 +397,7 @@ static void addCygMingExtraModule(ExecutionEngine &EE, LLVMContext &Context,
   EE.addModule(std::move(M));
 }
 
-CodeGenOptLevel getOptLevel() {
+static CodeGenOptLevel getOptLevel() {
   if (auto Level = CodeGenOpt::parseLevel(OptLevel))
     return *Level;
   WithColor::error(errs(), "lli") << "invalid optimization level.\n";
@@ -412,10 +409,10 @@ CodeGenOptLevel getOptLevel() {
   exit(1);
 }
 
-Error loadDylibs();
-int runOrcJIT(const char *ProgName);
-void disallowOrcOptions();
-Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote();
+static Error loadDylibs();
+static int runOrcJIT(const char *ProgName);
+static void disallowOrcOptions();
+static Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote();
 
 //===----------------------------------------------------------------------===//
 // main Driver function
@@ -863,7 +860,7 @@ static std::function<void(MemoryBuffer &)> createObjDebugDumper() {
   llvm_unreachable("Unknown DumpKind");
 }
 
-Error loadDylibs() {
+static Error loadDylibs() {
   for (const auto &Dylib : Dylibs) {
     std::string ErrMsg;
     if (sys::DynamicLibrary::LoadLibraryPermanently(Dylib.c_str(), &ErrMsg))
@@ -875,7 +872,7 @@ Error loadDylibs() {
 
 static void exitOnLazyCallThroughFailure() { exit(1); }
 
-Expected<orc::ThreadSafeModule>
+static Expected<orc::ThreadSafeModule>
 loadModule(StringRef Path, orc::ThreadSafeContext TSCtx) {
   SMDiagnostic Err;
   auto M = TSCtx.withContextDo(
@@ -895,7 +892,7 @@ loadModule(StringRef Path, orc::ThreadSafeContext TSCtx) {
   return orc::ThreadSafeModule(std::move(M), std::move(TSCtx));
 }
 
-int mingw_noop_main(void) {
+static int mingw_noop_main(void) {
   // Cygwin and MinGW insert calls from the main function to the runtime
   // function __main. The __main function is responsible for setting up main's
   // environment (e.g. running static constructors), however this is not needed
@@ -912,7 +909,7 @@ int mingw_noop_main(void) {
 // Try to enable debugger support for the given instance.
 // This alway returns success, but prints a warning if it's not able to enable
 // debugger support.
-Error tryEnableDebugSupport(orc::LLJIT &J) {
+static Error tryEnableDebugSupport(orc::LLJIT &J) {
   if (auto Err = enableDebuggerSupport(J)) {
     [[maybe_unused]] std::string ErrMsg = toString(std::move(Err));
     LLVM_DEBUG(dbgs() << "lli: " << ErrMsg << "\n");
@@ -920,7 +917,7 @@ Error tryEnableDebugSupport(orc::LLJIT &J) {
   return Error::success();
 }
 
-int runOrcJIT(const char *ProgName) {
+static int runOrcJIT(const char *ProgName) {
   // Start setting up the JIT environment.
 
   // Parse the main module.
@@ -1187,7 +1184,7 @@ int runOrcJIT(const char *ProgName) {
   return Result;
 }
 
-void disallowOrcOptions() {
+static void disallowOrcOptions() {
   // Make sure nobody used an orc-lazy specific option accidentally.
 
   if (LazyJITCompileThreads != 0) {
@@ -1206,7 +1203,7 @@ void disallowOrcOptions() {
   }
 }
 
-Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote() {
+static Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote() {
 #ifndef LLVM_ON_UNIX
   llvm_unreachable("launchRemote not supported on non-Unix platforms");
 #else
diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
index fa56d0d..fb5c0bf 100644
--- a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
+++ b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/Host.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
@@ -142,6 +143,7 @@ int AssembleOneInput(const uint8_t *Data, size_t Size) {
 
   static const std::vector<std::string> NoIncludeDirs;
   SrcMgr.setIncludeDirs(NoIncludeDirs);
+  SrcMgr.setVirtualFileSystem(vfs::getRealFileSystem());
 
   static std::string ArchName;
   std::string Error;
diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp
index 2a89961..3b2d4f8 100644
--- a/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/TargetParser/Host.h"
 #include <memory>
@@ -439,6 +440,7 @@ int main(int argc, char **argv) {
   // Record the location of the include directories so that the lexer can find
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
+  SrcMgr.setVirtualFileSystem(vfs::getRealFileSystem());
 
   std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TheTriple));
   assert(MRI && "Unable to create target register info!");
diff --git a/llvm/tools/llvm-ml/llvm-ml.cpp b/llvm/tools/llvm-ml/llvm-ml.cpp
index cda86e7..7b88576 100644
--- a/llvm/tools/llvm-ml/llvm-ml.cpp
+++ b/llvm/tools/llvm-ml/llvm-ml.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/TargetParser/Host.h"
 #include <ctime>
@@ -313,6 +314,7 @@ int llvm_ml_main(int Argc, char **Argv, const llvm::ToolContext &) {
     }
   }
   SrcMgr.setIncludeDirs(IncludeDirs);
+  SrcMgr.setVirtualFileSystem(vfs::getRealFileSystem());
 
   std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TheTriple));
   assert(MRI && "Unable to create target register info!");
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 46be539d..3ec644a 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -728,11 +728,17 @@ public:
     } while (!Comments.empty());
     FOS.flush();
   }
+
+  // Hook invoked when starting to disassemble a symbol at the current position.
+  // Default is no-op.
+  virtual void onSymbolStart() {}
 };
 PrettyPrinter PrettyPrinterInst;
 
 class HexagonPrettyPrinter : public PrettyPrinter {
 public:
+  void onSymbolStart() override { reset(); }
+
   void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address,
                  formatted_raw_ostream &OS) {
     if (LeadingAddr)
@@ -2228,6 +2234,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
         Start += Size;
         break;
       }
+      // Allow targets to reset any per-symbol state.
+      DT->Printer->onSymbolStart();
       formatted_raw_ostream FOS(OS);
       Index = Start;
       if (SectionAddr < StartAddress)
diff --git a/llvm/unittests/ADT/SmallVectorTest.cpp b/llvm/unittests/ADT/SmallVectorTest.cpp
index 137dd43..e2e778f 100644
--- a/llvm/unittests/ADT/SmallVectorTest.cpp
+++ b/llvm/unittests/ADT/SmallVectorTest.cpp
@@ -127,24 +127,24 @@ public:
     return c0.getValue() == c1.getValue();
   }
 
-  friend bool LLVM_ATTRIBUTE_UNUSED operator!=(const Constructable &c0,
-                                               const Constructable &c1) {
+  [[maybe_unused]] friend bool operator!=(const Constructable &c0,
+                                          const Constructable &c1) {
     return c0.getValue() != c1.getValue();
   }
 
   friend bool operator<(const Constructable &c0, const Constructable &c1) {
     return c0.getValue() < c1.getValue();
   }
-  friend bool LLVM_ATTRIBUTE_UNUSED operator<=(const Constructable &c0,
-                                               const Constructable &c1) {
+  [[maybe_unused]] friend bool operator<=(const Constructable &c0,
+                                          const Constructable &c1) {
     return c0.getValue() <= c1.getValue();
   }
-  friend bool LLVM_ATTRIBUTE_UNUSED operator>(const Constructable &c0,
-                                              const Constructable &c1) {
+  [[maybe_unused]] friend bool operator>(const Constructable &c0,
+                                         const Constructable &c1) {
     return c0.getValue() > c1.getValue();
   }
-  friend bool LLVM_ATTRIBUTE_UNUSED operator>=(const Constructable &c0,
-                                               const Constructable &c1) {
+  [[maybe_unused]] friend bool operator>=(const Constructable &c0,
+                                          const Constructable &c1) {
     return c0.getValue() >= c1.getValue();
   }
 };
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp
index 7340f56..04cd66c 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp
@@ -420,12 +420,14 @@ TEST(LegalizerInfoTest, RuleSets) {
 
     // Raw type form
     LI.getActionDefinitionsBuilder(G_ADD)
-      .fewerElementsIf(typeIs(0, v4s32), changeElementCountTo(0, v2s32))
-      .fewerElementsIf(typeIs(0, v8s32), changeElementCountTo(0, s32))
-      .fewerElementsIf(typeIs(0, LLT::scalable_vector(4, 16)),
-                       changeElementCountTo(0, LLT::scalable_vector(2, 16)))
-      .fewerElementsIf(typeIs(0, LLT::scalable_vector(8, 16)),
-                       changeElementCountTo(0, s16));
+        .fewerElementsIf(typeIs(0, v4s32),
+                         changeElementCountTo(0, ElementCount::getFixed(2)))
+        .fewerElementsIf(typeIs(0, v8s32),
+                         changeElementCountTo(0, ElementCount::getFixed(1)))
+        .fewerElementsIf(typeIs(0, LLT::scalable_vector(4, s16)),
+                         changeElementCountTo(0, ElementCount::getScalable(2)))
+        .fewerElementsIf(typeIs(0, LLT::scalable_vector(8, s16)),
+                         changeElementCountTo(0, ElementCount::getFixed(1)));
 
     LegacyInfo.computeTables();
 
diff --git a/llvm/unittests/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManagerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManagerTest.cpp
index d4b45ea..2c6650d 100644
--- a/llvm/unittests/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManagerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManagerTest.cpp
@@ -39,8 +39,11 @@ public:
     return ExecutorAddr::fromPtr(MB.base());
   }
 
-  Error finalize(tpctypes::FinalizeRequest FR) {
+  Expected<ExecutorAddr> initialize(tpctypes::FinalizeRequest FR) {
+    assert(!FR.Segments.empty());
+    ExecutorAddr Base = FR.Segments[0].Addr;
     for (auto &Seg : FR.Segments) {
+      Base = std::min(Base, Seg.Addr);
       char *Mem = Seg.Addr.toPtr<char *>();
       memcpy(Mem, Seg.Content.data(), Seg.Content.size());
       memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size());
@@ -52,10 +55,10 @@ public:
       if ((Seg.RAG.Prot & MemProt::Exec) != MemProt::Exec)
         sys::Memory::InvalidateInstructionCache(Mem, Seg.Size);
     }
-    return Error::success();
+    return Base;
   }
 
-  Error deallocate(std::vector<ExecutorAddr> &Bases) {
+  Error release(std::vector<ExecutorAddr> &Bases) {
     Error Err = Error::success();
     for (auto &Base : Bases) {
       auto I = Blocks.find(Base.toPtr<void *>());
@@ -86,18 +89,18 @@ CWrapperFunctionResult testReserve(const char *ArgData, size_t ArgSize) {
           .release();
 }
 
-CWrapperFunctionResult testFinalize(const char *ArgData, size_t ArgSize) {
-  return WrapperFunction<rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>::
+CWrapperFunctionResult testInitialize(const char *ArgData, size_t ArgSize) {
+  return WrapperFunction<
+             rt::SPSSimpleExecutorMemoryManagerInitializeSignature>::
       handle(ArgData, ArgSize,
-             makeMethodWrapperHandler(&SimpleAllocator::finalize))
+             makeMethodWrapperHandler(&SimpleAllocator::initialize))
           .release();
 }
 
-CWrapperFunctionResult testDeallocate(const char *ArgData, size_t ArgSize) {
-  return WrapperFunction<
-             rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>::
+CWrapperFunctionResult testRelease(const char *ArgData, size_t ArgSize) {
+  return WrapperFunction<rt::SPSSimpleExecutorMemoryManagerReleaseSignature>::
       handle(ArgData, ArgSize,
-             makeMethodWrapperHandler(&SimpleAllocator::deallocate))
+             makeMethodWrapperHandler(&SimpleAllocator::release))
           .release();
 }
 
@@ -108,8 +111,8 @@ TEST(EPCGenericJITLinkMemoryManagerTest, AllocFinalizeFree) {
   EPCGenericJITLinkMemoryManager::SymbolAddrs SAs;
   SAs.Allocator = ExecutorAddr::fromPtr(&SA);
   SAs.Reserve = ExecutorAddr::fromPtr(&testReserve);
-  SAs.Finalize = ExecutorAddr::fromPtr(&testFinalize);
-  SAs.Deallocate = ExecutorAddr::fromPtr(&testDeallocate);
+  SAs.Initialize = ExecutorAddr::fromPtr(&testInitialize);
+  SAs.Release = ExecutorAddr::fromPtr(&testRelease);
 
   auto MemMgr = std::make_unique<EPCGenericJITLinkMemoryManager>(*SelfEPC, SAs);
   StringRef Hello = "hello";
diff --git a/llvm/unittests/ExecutionEngine/Orc/SimpleExecutorMemoryManagerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/SimpleExecutorMemoryManagerTest.cpp
index 6e9b0b2..9c6f19c 100644
--- a/llvm/unittests/ExecutionEngine/Orc/SimpleExecutorMemoryManagerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/SimpleExecutorMemoryManagerTest.cpp
@@ -34,12 +34,12 @@ TEST(SimpleExecutorMemoryManagerTest, AllocFinalizeFree) {
   SimpleExecutorMemoryManager MemMgr;
 
   constexpr unsigned AllocSize = 16384;
-  auto Mem = MemMgr.allocate(AllocSize);
+  auto Mem = MemMgr.reserve(AllocSize);
   EXPECT_THAT_ERROR(Mem.takeError(), Succeeded());
 
   std::string HW = "Hello, world!";
 
-  int FinalizeCounter = 0;
+  int InitializeCounter = 0;
   int DeallocateCounter = 0;
 
   tpctypes::FinalizeRequest FR;
@@ -52,27 +52,27 @@ TEST(SimpleExecutorMemoryManagerTest, AllocFinalizeFree) {
       {/* Finalize: */
        cantFail(WrapperFunctionCall::Create<SPSArgList<SPSExecutorAddr>>(
            ExecutorAddr::fromPtr(incrementWrapper),
-           ExecutorAddr::fromPtr(&FinalizeCounter))),
+           ExecutorAddr::fromPtr(&InitializeCounter))),
        /*  Deallocate: */
        cantFail(WrapperFunctionCall::Create<SPSArgList<SPSExecutorAddr>>(
            ExecutorAddr::fromPtr(incrementWrapper),
            ExecutorAddr::fromPtr(&DeallocateCounter)))});
 
-  EXPECT_EQ(FinalizeCounter, 0);
+  EXPECT_EQ(InitializeCounter, 0);
   EXPECT_EQ(DeallocateCounter, 0);
 
-  auto FinalizeErr = MemMgr.finalize(FR);
-  EXPECT_THAT_ERROR(std::move(FinalizeErr), Succeeded());
+  auto InitializeErr = MemMgr.initialize(FR);
+  EXPECT_THAT_EXPECTED(std::move(InitializeErr), Succeeded());
 
-  EXPECT_EQ(FinalizeCounter, 1);
+  EXPECT_EQ(InitializeCounter, 1);
   EXPECT_EQ(DeallocateCounter, 0);
 
   EXPECT_EQ(HW, std::string(Mem->toPtr<const char *>()));
 
-  auto DeallocateErr = MemMgr.deallocate({*Mem});
-  EXPECT_THAT_ERROR(std::move(DeallocateErr), Succeeded());
+  auto ReleaseErr = MemMgr.release({*Mem});
+  EXPECT_THAT_ERROR(std::move(ReleaseErr), Succeeded());
 
-  EXPECT_EQ(FinalizeCounter, 1);
+  EXPECT_EQ(InitializeCounter, 1);
   EXPECT_EQ(DeallocateCounter, 1);
 }
 
diff --git a/llvm/unittests/IR/RuntimeLibcallsTest.cpp b/llvm/unittests/IR/RuntimeLibcallsTest.cpp
index 26cb7e3..8925d2b 100644
--- a/llvm/unittests/IR/RuntimeLibcallsTest.cpp
+++ b/llvm/unittests/IR/RuntimeLibcallsTest.cpp
@@ -44,9 +44,9 @@ TEST(RuntimeLibcallsTest, LibcallImplByName) {
         RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("sqrtl");
     ASSERT_EQ(size(SquirtleSquad), 3);
     auto I = SquirtleSquad.begin();
-    EXPECT_EQ(*I++, RTLIB::impl_sqrt_f128);
-    EXPECT_EQ(*I++, RTLIB::impl_sqrt_f80);
-    EXPECT_EQ(*I++, RTLIB::impl_sqrt_ppcf128);
+    EXPECT_EQ(*I++, RTLIB::impl_sqrtl_f128);
+    EXPECT_EQ(*I++, RTLIB::impl_sqrtl_f80);
+    EXPECT_EQ(*I++, RTLIB::impl_sqrtl_ppcf128);
   }
 
   // Last libcall
@@ -54,9 +54,9 @@ TEST(RuntimeLibcallsTest, LibcallImplByName) {
     auto Truncs = RTLIB::RuntimeLibcallsInfo::lookupLibcallImplName("truncl");
     ASSERT_EQ(size(Truncs), 3);
     auto I = Truncs.begin();
-    EXPECT_EQ(*I++, RTLIB::impl_trunc_f128);
-    EXPECT_EQ(*I++, RTLIB::impl_trunc_f80);
-    EXPECT_EQ(*I++, RTLIB::impl_trunc_ppcf128);
+    EXPECT_EQ(*I++, RTLIB::impl_truncl_f128);
+    EXPECT_EQ(*I++, RTLIB::impl_truncl_f80);
+    EXPECT_EQ(*I++, RTLIB::impl_truncl_ppcf128);
   }
 }
 
diff --git a/llvm/unittests/Support/SourceMgrTest.cpp b/llvm/unittests/Support/SourceMgrTest.cpp
index 301b64f..c65f001 100644
--- a/llvm/unittests/Support/SourceMgrTest.cpp
+++ b/llvm/unittests/Support/SourceMgrTest.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 
@@ -506,3 +507,13 @@ TEST_F(SourceMgrTest, PrintWithoutLoc) {
   Diag.print(nullptr, OS, false, false, false);
   EXPECT_EQ("message\n", Output);
 }
+
+TEST_F(SourceMgrTest, IncludeDirs) {
+  auto VFS = makeIntrusiveRefCnt<vfs::InMemoryFileSystem>();
+  VFS->addFile("include/file", 0, MemoryBuffer::getMemBuffer("contents"));
+  SM.setVirtualFileSystem(std::move(VFS));
+  SM.setIncludeDirs({"include"});
+  std::string ResolvedPath;
+  unsigned NumBuffers = SM.AddIncludeFile("file", SMLoc(), ResolvedPath);
+  EXPECT_EQ(NumBuffers, 1u);
+}
diff --git a/llvm/utils/TableGen/Basic/RISCVTargetDefEmitter.cpp b/llvm/utils/TableGen/Basic/RISCVTargetDefEmitter.cpp
index df14c77..f795937 100644
--- a/llvm/utils/TableGen/Basic/RISCVTargetDefEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RISCVTargetDefEmitter.cpp
@@ -68,13 +68,14 @@ static void emitRISCVExtensions(const RecordKeeper &Records, raw_ostream &OS) {
   if (!Extensions.empty()) {
     OS << "\nstatic constexpr ImpliedExtsEntry ImpliedExts[] = {\n";
     for (const Record *Ext : Extensions) {
-      auto ImpliesList = Ext->getValueAsListOfDefs("Implies");
+      std::vector<const Record *> ImpliesList =
+          Ext->getValueAsListOfDefs("Implies");
       if (ImpliesList.empty())
         continue;
 
       StringRef Name = getExtensionName(Ext);
 
-      for (auto *ImpliedExt : ImpliesList) {
+      for (const Record *ImpliedExt : ImpliesList) {
         if (!ImpliedExt->isSubClassOf("RISCVExtension"))
           continue;
 
@@ -150,11 +151,12 @@ static void emitRISCVProfiles(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#ifdef GET_SUPPORTED_PROFILES\n";
   OS << "#undef GET_SUPPORTED_PROFILES\n\n";
 
-  auto Profiles = Records.getAllDerivedDefinitionsIfDefined("RISCVProfile");
+  ArrayRef<const Record *> Profiles =
+      Records.getAllDerivedDefinitionsIfDefined("RISCVProfile");
 
   if (!Profiles.empty()) {
     printProfileTable(OS, Profiles, /*Experimental=*/false);
-    bool HasExperimentalProfiles = any_of(Profiles, [&](auto &Rec) {
+    bool HasExperimentalProfiles = any_of(Profiles, [&](const Record *Rec) {
       return Rec->getValueAsBit("Experimental");
     });
     if (HasExperimentalProfiles)
@@ -173,15 +175,17 @@ static void emitRISCVProcs(const RecordKeeper &RK, raw_ostream &OS) {
   // Iterate on all definition records.
   for (const Record *Rec :
        RK.getAllDerivedDefinitionsIfDefined("RISCVProcessorModel")) {
-    const std::vector<const Record *> &Features =
+    std::vector<const Record *> Features =
         Rec->getValueAsListOfDefs("Features");
-    bool FastScalarUnalignedAccess = any_of(Features, [&](auto &Feature) {
-      return Feature->getValueAsString("Name") == "unaligned-scalar-mem";
-    });
-
-    bool FastVectorUnalignedAccess = any_of(Features, [&](auto &Feature) {
-      return Feature->getValueAsString("Name") == "unaligned-vector-mem";
-    });
+    bool FastScalarUnalignedAccess =
+        any_of(Features, [&](const Record *Feature) {
+          return Feature->getValueAsString("Name") == "unaligned-scalar-mem";
+        });
+
+    bool FastVectorUnalignedAccess =
+        any_of(Features, [&](const Record *Feature) {
+          return Feature->getValueAsString("Name") == "unaligned-vector-mem";
+        });
 
     OS << "PROC(" << Rec->getName() << ", {\"" << Rec->getValueAsString("Name")
        << "\"}, {\"";
diff --git a/llvm/utils/TableGen/Common/Types.cpp b/llvm/utils/TableGen/Common/Types.cpp
index 35b79b3..8e8d6f6 100644
--- a/llvm/utils/TableGen/Common/Types.cpp
+++ b/llvm/utils/TableGen/Common/Types.cpp
@@ -8,16 +8,12 @@
 
 #include "Types.h"
 
-// For LLVM_ATTRIBUTE_UNUSED
-#include "llvm/Support/Compiler.h"
-
 #include <cassert>
 
 using namespace llvm;
 
-const char *
-llvm::getMinimalTypeForRange(uint64_t Range,
-                             unsigned MaxSize LLVM_ATTRIBUTE_UNUSED) {
+const char *llvm::getMinimalTypeForRange(uint64_t Range,
+                                         [[maybe_unused]] unsigned MaxSize) {
   // TODO: The original callers only used 32 and 64 so these are the only
   //       values permitted. Rather than widen the supported values we should
   //       allow 64 for the callers that currently use 32 and remove the
diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp
index dba8bde..e0be104 100644
--- a/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -555,7 +555,7 @@ void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) {
     raw_string_ostream SuffixOS(ManglingSuffix);
     Operands.PrintManglingSuffix(SuffixOS, ImmediatePredicates, true);
     if (!StringSwitch<bool>(ManglingSuffix)
-             .Cases("", "r", "rr", "ri", "i", "f", true)
+             .Cases({"", "r", "rr", "ri", "i", "f"}, true)
              .Default(false))
       continue;
 
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
index e9fd132..406a728 100755
--- a/llvm/utils/git/code-format-helper.py
+++ b/llvm/utils/git/code-format-helper.py
@@ -391,7 +391,7 @@ You can test this locally with the following command:
             return None
 
         # Use git to find files that have had a change in the number of undefs
-        regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)"
+        regex = "([^a-zA-Z0-9#_-]undef([^a-zA-Z0-9_-]|$)|UndefValue::get)"
         cmd = ["git", "diff", "-U0", "--pickaxe-regex", "-S", regex]
 
         if args.start_rev and args.end_rev:
diff --git a/llvm/utils/gn/secondary/clang/unittests/Basic/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Basic/BUILD.gn
index 1449dc7..954de88 100644
--- a/llvm/utils/gn/secondary/clang/unittests/Basic/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/Basic/BUILD.gn
@@ -14,6 +14,7 @@ unittest("BasicTests") {
     "DiagnosticTest.cpp",
     "FileEntryTest.cpp",
     "FileManagerTest.cpp",
+    "LangOptionsTest.cpp",
     "LineOffsetMappingTest.cpp",
     "OffloadArchTest.cpp",
     "SanitizersTest.cpp",
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 9753dca..b67e4cb 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -973,6 +973,7 @@ def LLVM_ShuffleVectorOp : LLVM_Op<"shufflevector",
     custom<ShuffleType>(ref(type($v1)), type($res), ref($mask))
   }];
 
+  let hasFolder = 1;
   let hasVerifier = 1;
 
   string llvmInstName = "ShuffleVector";
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index ce9ff7e..d959464 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -263,6 +263,7 @@ class NVVM_PureSpecialRangeableRegisterOp<string mnemonic, list<Trait> traits =
   let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)";
   let llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # baseLlvmBuilderCoda;
   let mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # baseMlirBuilderCoda;
+  let hasVerifier = 1;
 
   // Backwards-compatibility builder for an unspecified range.
   let builders = [
@@ -279,6 +280,11 @@ class NVVM_PureSpecialRangeableRegisterOp<string mnemonic, list<Trait> traits =
         SetIntRangeFn setResultRanges) {
         nvvmInferResultRanges(getOperation(), getResult(), argRanges, setResultRanges);
     }
+
+    // Verify the range attribute satisfies LLVM ConstantRange constructor requirements.
+    ::llvm::LogicalResult $cppClass::verify() {
+      return verifyConstantRangeAttr(getOperation(), getRange());
+    }
   }];
 
 }
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 6925cec..68f31e6 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -412,6 +412,32 @@ def ROCDL_WaitExpcntOp: ROCDL_ConcreteNonMemIntrOp<"s.wait.expcnt", [], 0, [0],
   let assemblyFormat = "$count attr-dict";
 }
 
+def ROCDL_WaitAsynccntOp: ROCDL_ConcreteNonMemIntrOp<"s.wait.asynccnt", [], 0, [0], ["count"]>,
+  Arguments<(ins I16Attr:$count)> {
+  let summary = "Wait until ASYNCCNT is less than or equal to `count`";
+  let description = [{
+      Wait for the counter specified to be less-than or equal-to the `count`
+      before continuing.
+
+      Available on gfx1250+.
+  }];
+  let results = (outs);
+  let assemblyFormat = "$count attr-dict";
+}
+
+def ROCDL_WaitTensorcntOp: ROCDL_ConcreteNonMemIntrOp<"s.wait.tensorcnt", [], 0, [0], ["count"]>,
+  Arguments<(ins I16Attr:$count)> {
+  let summary = "Wait until TENSORCNT is less than or equal to `count`";
+  let description = [{
+      Wait for the counter specified to be less-than or equal-to the `count`
+      before continuing.
+
+      Available on gfx1250+.
+  }];
+  let results = (outs);
+  let assemblyFormat = "$count attr-dict";
+}
+
 def ROCDL_SetPrioOp : ROCDL_ConcreteNonMemIntrOp<"s.setprio", [], 0, [0], ["priority"]>,
   Arguments<(ins I16Attr:$priority)> {
   let assemblyFormat = "$priority attr-dict";
diff --git a/mlir/include/mlir/Dialect/Shard/IR/ShardOps.td b/mlir/include/mlir/Dialect/Shard/IR/ShardOps.td
index 29b384f..b9d7163 100644
--- a/mlir/include/mlir/Dialect/Shard/IR/ShardOps.td
+++ b/mlir/include/mlir/Dialect/Shard/IR/ShardOps.td
@@ -174,7 +174,7 @@ def Shard_NeighborsLinearIndicesOp : Shard_Op<"neighbors_linear_indices", [
     ```
     The above returns two indices, `633` and `693`, which correspond to the
     index of the previous process `(1, 1, 3)`, and the next process 
-    `(1, 3, 3) along the split axis `1`.
+    `(1, 3, 3)` along the split axis `1`.
 
     A negative value is returned if there is no neighbor in the respective
     direction along the given `split_axes`.
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 6e79085..6e15b1e 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -2999,6 +2999,7 @@ def Vector_StepOp : Vector_Op<"step", [
   }];
   let results = (outs VectorOfRankAndType<[1], [Index]>:$result);
   let assemblyFormat = "attr-dict `:` type($result)";
+  let hasCanonicalizer = 1;
 }
 
 def Vector_YieldOp : Vector_Op<"yield", [
diff --git a/mlir/include/mlir/TableGen/CodeGenHelpers.h b/mlir/include/mlir/TableGen/CodeGenHelpers.h
index 252da21..997aef2 100644
--- a/mlir/include/mlir/TableGen/CodeGenHelpers.h
+++ b/mlir/include/mlir/TableGen/CodeGenHelpers.h
@@ -88,7 +88,7 @@ public:
   ///
   /// Constraints that do not meet the restriction that they can only reference
   /// `$_self` and `$_op` are not uniqued.
-  void emitOpConstraints(ArrayRef<const llvm::Record *> opDefs);
+  void emitOpConstraints();
 
   /// Unique all compatible type and attribute constraints from a pattern file
   /// and emit them at the top of the generated file.
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 5355909..41d8d53 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -1723,17 +1723,18 @@ struct VectorBroadcastScalarToLowRankLowering
       return success();
     }
 
-    // For 1-d vector, we additionally do a `vectorshuffle`.
     auto v =
         LLVM::InsertElementOp::create(rewriter, broadcast.getLoc(), vectorType,
                                       poison, adaptor.getSource(), zero);
 
+    // For 1-d vector, we additionally do a `shufflevector`.
     int64_t width = cast<VectorType>(broadcast.getType()).getDimSize(0);
     SmallVector<int32_t> zeroValues(width, 0);
 
     // Shuffle the value across the desired number of elements.
-    rewriter.replaceOpWithNewOp<LLVM::ShuffleVectorOp>(broadcast, v, poison,
-                                                       zeroValues);
+    auto shuffle = rewriter.createOrFold<LLVM::ShuffleVectorOp>(
+        broadcast.getLoc(), v, poison, zeroValues);
+    rewriter.replaceOp(broadcast, shuffle);
     return success();
   }
 };
diff --git a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
index 71687b1..ddcbc44 100644
--- a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
+++ b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
@@ -20,6 +20,7 @@
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/SCF/Transforms/Patterns.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
@@ -390,7 +391,8 @@ class LoadStoreToXeVMPattern : public OpConversionPattern<OpType> {
     // Load result or Store valye Type can be vector or scalar.
     Type valOrResTy;
     if constexpr (std::is_same_v<OpType, xegpu::LoadGatherOp>)
-      valOrResTy = op.getResult().getType();
+      valOrResTy =
+          this->getTypeConverter()->convertType(op.getResult().getType());
     else
       valOrResTy = adaptor.getValue().getType();
     VectorType valOrResVecTy = dyn_cast<VectorType>(valOrResTy);
@@ -878,10 +880,30 @@ struct ConvertXeGPUToXeVMPass
       }
       return {};
     };
-    typeConverter.addSourceMaterialization(memrefMaterializationCast);
-    typeConverter.addSourceMaterialization(ui64MaterializationCast);
-    typeConverter.addSourceMaterialization(ui32MaterializationCast);
-    typeConverter.addSourceMaterialization(vectorMaterializationCast);
+
+    // If result type of original op is single element vector and lowered type
+    // is scalar. This materialization cast creates a single element vector by
+    // broadcasting the scalar value.
+    auto singleElementVectorMaterializationCast =
+        [](OpBuilder &builder, Type type, ValueRange inputs,
+           Location loc) -> Value {
+      if (inputs.size() != 1)
+        return {};
+      auto input = inputs.front();
+      if (input.getType().isIntOrIndexOrFloat()) {
+        // If the input is a scalar, and the target type is a vector of single
+        // element, create a single element vector by broadcasting.
+        if (auto vecTy = dyn_cast<VectorType>(type)) {
+          if (vecTy.getNumElements() == 1) {
+            return vector::BroadcastOp::create(builder, loc, vecTy, input)
+                .getResult();
+          }
+        }
+      }
+      return {};
+    };
+    typeConverter.addSourceMaterialization(
+        singleElementVectorMaterializationCast);
     typeConverter.addTargetMaterialization(memrefMaterializationCast);
     typeConverter.addTargetMaterialization(ui32MaterializationCast);
     typeConverter.addTargetMaterialization(ui64MaterializationCast);
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 7ca09d9..3eae67f 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -2826,6 +2826,20 @@ LogicalResult ShuffleVectorOp::verify() {
   return success();
 }
 
+// Folding for shufflevector op when v1 is single element 1D vector
+// and the mask is a single zero. OpFoldResult will be v1 in this case.
+OpFoldResult ShuffleVectorOp::fold(FoldAdaptor adaptor) {
+  // Check if operand 0 is a single element vector.
+  auto vecType = llvm::dyn_cast<VectorType>(getV1().getType());
+  if (!vecType || vecType.getRank() != 1 || vecType.getNumElements() != 1)
+    return {};
+  // Check if the mask is a single zero.
+  // Note: The mask is guaranteed to be non-empty.
+  if (getMask().size() != 1 || getMask()[0] != 0)
+    return {};
+  return getV1();
+}
+
 //===----------------------------------------------------------------------===//
 // Implementations for LLVM::LLVMFuncOp.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index ab54183..2a8c330 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -798,6 +798,26 @@ LogicalResult MmaOp::verify() {
                          " attribute");
   }
 
+  // Validate layout combinations. According to the operation description, most
+  // MMA operations require layoutA=row and layoutB=col. Only m8n8k4 with f16
+  // can use other layout combinations.
+  bool isM8N8K4_F16 =
+      (mmaShape[0] == 8 && mmaShape[1] == 8 && mmaShape[2] == 4 &&
+       getMultiplicandAPtxType() == MMATypes::f16);
+
+  if (!isM8N8K4_F16) {
+    // For all other shapes/types, layoutA must be row and layoutB must be col
+    if (getLayoutA() != MMALayout::row || getLayoutB() != MMALayout::col) {
+      return emitOpError("requires layoutA = #nvvm.mma_layout<row> and "
+                         "layoutB = #nvvm.mma_layout<col> for shape <")
+             << mmaShape[0] << ", " << mmaShape[1] << ", " << mmaShape[2]
+             << "> with element types "
+             << stringifyEnum(*getMultiplicandAPtxType()) << " and "
+             << stringifyEnum(*getMultiplicandBPtxType())
+             << ". Only m8n8k4 with f16 supports other layouts.";
+    }
+  }
+
   return success();
 }
 
@@ -2334,6 +2354,32 @@ static void nvvmInferResultRanges(Operation *op, Value result,
   }
 }
 
+/// Verify the range attribute satisfies LLVM ConstantRange constructor
+/// requirements for NVVM SpecialRangeableRegisterOp.
+static LogicalResult
+verifyConstantRangeAttr(Operation *op,
+                        std::optional<LLVM::ConstantRangeAttr> rangeAttr) {
+  if (!rangeAttr)
+    return success();
+
+  const llvm::APInt &lower = rangeAttr->getLower();
+  const llvm::APInt &upper = rangeAttr->getUpper();
+
+  // Check LLVM ConstantRange constructor condition
+  if (lower == upper && !lower.isMaxValue() && !lower.isMinValue()) {
+    unsigned bitWidth = lower.getBitWidth();
+    llvm::APInt minVal = llvm::APInt::getMinValue(bitWidth);
+    llvm::APInt maxVal = llvm::APInt::getMaxValue(bitWidth);
+    return op->emitOpError(
+               "invalid range attribute: Lower == Upper, but they aren't min (")
+           << llvm::toString(minVal, 10, false) << ") or max ("
+           << llvm::toString(maxVal, 10, false)
+           << ") value! This is an invalid constant range.";
+  }
+
+  return success();
+}
+
 static llvm::Value *getAsPackedI32(llvm::Value *arg,
                                    llvm::IRBuilderBase &builder) {
   return builder.CreateBitCast(arg,
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 58256b0..45c54c7 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -7601,6 +7601,111 @@ void StepOp::inferResultRanges(ArrayRef<ConstantIntRanges> argRanges,
   setResultRanges(getResult(), result);
 }
 
+namespace {
+
+/// Fold `vector.step -> arith.cmpi` when the step value is compared to a
+/// constant large enough such that the result is the same at all indices.
+///
+/// For example, rewrite the 'greater than' comparison below,
+///
+/// ```mlir
+/// %cst = arith.constant dense<7> : vector<3xindex>
+/// %stp = vector.step : vector<3xindex>
+/// %out = arith.cmpi ugt, %stp, %cst : vector<3xindex>
+/// ```
+///
+/// as,
+///
+/// ```mlir
+/// %out = arith.constant dense<false> : vector<3xi1>.
+/// ```
+///
+/// Above `[0, 1, 2] > [7, 7, 7]` => `[false, false, false]`. Because the result
+/// is false at ALL indices we fold. If the constant was 1, then
+/// `[0, 1, 2] > [1, 1, 1]` => `[false, false, true]` and we do fold,
+/// conservatively preferring the 'compact' vector.step representation.
+///
+/// Note: this folder only works for the case where the constant (`%cst` above)
+/// is the second operand of the comparison. The arith.cmpi canonicalizer will
+/// ensure that constants are always second (on the right).
+struct StepCompareFolder : public OpRewritePattern<StepOp> {
+  using Base::Base;
+
+  LogicalResult matchAndRewrite(StepOp stepOp,
+                                PatternRewriter &rewriter) const override {
+    const int64_t stepSize = stepOp.getResult().getType().getNumElements();
+
+    for (OpOperand &use : stepOp.getResult().getUses()) {
+      auto cmpiOp = dyn_cast<arith::CmpIOp>(use.getOwner());
+      if (!cmpiOp)
+        continue;
+
+      // arith.cmpi canonicalizer makes constants final operands.
+      const unsigned stepOperandNumber = use.getOperandNumber();
+      if (stepOperandNumber != 0)
+        continue;
+
+      // Check that operand 1 is a constant.
+      unsigned constOperandNumber = 1;
+      Value otherOperand = cmpiOp.getOperand(constOperandNumber);
+      std::optional<int64_t> maybeConstValue =
+          getConstantIntValue(otherOperand);
+      if (!maybeConstValue.has_value())
+        continue;
+
+      int64_t constValue = maybeConstValue.value();
+      arith::CmpIPredicate pred = cmpiOp.getPredicate();
+
+      auto maybeSplat = [&]() -> std::optional<bool> {
+        // Handle ult (unsigned less than) and uge (unsigned greater equal).
+        if ((pred == arith::CmpIPredicate::ult ||
+             pred == arith::CmpIPredicate::uge) &&
+            stepSize <= constValue)
+          return pred == arith::CmpIPredicate::ult;
+
+        // Handle ule and ugt.
+        if ((pred == arith::CmpIPredicate::ule ||
+             pred == arith::CmpIPredicate::ugt) &&
+            stepSize - 1 <= constValue) {
+          return pred == arith::CmpIPredicate::ule;
+        }
+
+        // Handle eq and ne.
+        if ((pred == arith::CmpIPredicate::eq ||
+             pred == arith::CmpIPredicate::ne) &&
+            stepSize <= constValue)
+          return pred == arith::CmpIPredicate::ne;
+
+        return std::nullopt;
+      }();
+
+      if (!maybeSplat.has_value())
+        continue;
+
+      rewriter.setInsertionPointAfter(cmpiOp);
+
+      auto type = dyn_cast<VectorType>(cmpiOp.getResult().getType());
+      if (!type)
+        continue;
+
+      auto boolAttr = DenseElementsAttr::get(type, maybeSplat.value());
+      Value splat = mlir::arith::ConstantOp::create(rewriter, cmpiOp.getLoc(),
+                                                    type, boolAttr);
+
+      rewriter.replaceOp(cmpiOp, splat);
+      return success();
+    }
+
+    return failure();
+  }
+};
+} // namespace
+
+void StepOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                         MLIRContext *context) {
+  results.add<StepCompareFolder>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // Vector Masking Utilities
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
index e95338f..12e6475 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
@@ -928,17 +928,20 @@ struct WarpOpDeadResult : public WarpDistributionPattern {
     // Some values may be yielded multiple times and correspond to multiple
     // results. Deduplicating occurs by taking each result with its matching
     // yielded value, and:
-    //   1. recording the unique first position at which the value is yielded.
+    //   1. recording the unique first position at which the value with uses is
+    //   yielded.
     //   2. recording for the result, the first position at which the dedup'ed
     //      value is yielded.
     //   3. skipping from the new result types / new yielded values any result
     //      that has no use or whose yielded value has already been seen.
     for (OpResult result : warpOp.getResults()) {
+      if (result.use_empty())
+        continue;
       Value yieldOperand = yield.getOperand(result.getResultNumber());
       auto it = dedupYieldOperandPositionMap.insert(
           std::make_pair(yieldOperand, newResultTypes.size()));
       dedupResultPositionMap.insert(std::make_pair(result, it.first->second));
-      if (result.use_empty() || !it.second)
+      if (!it.second)
         continue;
       newResultTypes.push_back(result.getType());
       newYieldValues.push_back(yieldOperand);
@@ -1843,16 +1846,16 @@ struct WarpOpScfIfOp : public WarpDistributionPattern {
     newWarpOpDistTypes.append(escapingValueDistTypesElse.begin(),
                               escapingValueDistTypesElse.end());
 
-    llvm::SmallDenseMap<unsigned, unsigned> origToNewYieldIdx;
     for (auto [idx, val] :
          llvm::zip_equal(nonIfYieldIndices, nonIfYieldValues)) {
-      origToNewYieldIdx[idx] = newWarpOpYieldValues.size();
       newWarpOpYieldValues.push_back(val);
       newWarpOpDistTypes.push_back(warpOp.getResult(idx).getType());
     }
-    // Create the new `WarpOp` with the updated yield values and types.
-    WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(
-        rewriter, warpOp, newWarpOpYieldValues, newWarpOpDistTypes);
+    // Replace the old `WarpOp` with the new one that has additional yield
+    // values and types.
+    SmallVector<size_t> newIndices;
+    WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
+        rewriter, warpOp, newWarpOpYieldValues, newWarpOpDistTypes, newIndices);
     // `ifOp` returns the result of the inner warp op.
     SmallVector<Type> newIfOpDistResTypes;
     for (auto [i, res] : llvm::enumerate(ifOp.getResults())) {
@@ -1870,8 +1873,8 @@ struct WarpOpScfIfOp : public WarpDistributionPattern {
     OpBuilder::InsertionGuard g(rewriter);
     rewriter.setInsertionPointAfter(newWarpOp);
     auto newIfOp = scf::IfOp::create(
-        rewriter, ifOp.getLoc(), newIfOpDistResTypes, newWarpOp.getResult(0),
-        static_cast<bool>(ifOp.thenBlock()),
+        rewriter, ifOp.getLoc(), newIfOpDistResTypes,
+        newWarpOp.getResult(newIndices[0]), static_cast<bool>(ifOp.thenBlock()),
         static_cast<bool>(ifOp.elseBlock()));
     auto encloseRegionInWarpOp =
         [&](Block *oldIfBranch, Block *newIfBranch,
@@ -1888,7 +1891,7 @@ struct WarpOpScfIfOp : public WarpDistributionPattern {
           for (size_t i = 0; i < escapingValues.size();
                ++i, ++warpResRangeStart) {
             innerWarpInputVals.push_back(
-                newWarpOp.getResult(warpResRangeStart));
+                newWarpOp.getResult(newIndices[warpResRangeStart]));
             escapeValToBlockArgIndex[escapingValues[i]] =
                 innerWarpInputTypes.size();
             innerWarpInputTypes.push_back(escapingValueInputTypes[i]);
@@ -1936,17 +1939,8 @@ struct WarpOpScfIfOp : public WarpDistributionPattern {
     // Update the users of `<- WarpOp.yield <- IfOp.yield` to use the new `IfOp`
     // result.
     for (auto [origIdx, newIdx] : ifResultMapping)
-      rewriter.replaceAllUsesExcept(warpOp.getResult(origIdx),
+      rewriter.replaceAllUsesExcept(newWarpOp.getResult(origIdx),
                                     newIfOp.getResult(newIdx), newIfOp);
-    // Similarly, update any users of the `WarpOp` results that were not
-    // results of the `IfOp`.
-    for (auto [origIdx, newIdx] : origToNewYieldIdx)
-      rewriter.replaceAllUsesWith(warpOp.getResult(origIdx),
-                                  newWarpOp.getResult(newIdx));
-    // Remove the original `WarpOp` and `IfOp`, they should not have any uses
-    // at this point.
-    rewriter.eraseOp(ifOp);
-    rewriter.eraseOp(warpOp);
     return success();
   }
 
@@ -2065,19 +2059,16 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
                               escapingValueDistTypes.begin(),
                               escapingValueDistTypes.end());
     // Next, we insert all non-`ForOp` yielded values and their distributed
-    // types. We also create a mapping between the non-`ForOp` yielded value
-    // index and the corresponding new `WarpOp` yield value index (needed to
-    // update users later).
-    llvm::SmallDenseMap<unsigned, unsigned> nonForResultMapping;
+    // types.
     for (auto [i, v] :
          llvm::zip_equal(nonForResultIndices, nonForYieldedValues)) {
-      nonForResultMapping[i] = newWarpOpYieldValues.size();
       newWarpOpYieldValues.push_back(v);
       newWarpOpDistTypes.push_back(warpOp.getResult(i).getType());
     }
     // Create the new `WarpOp` with the updated yield values and types.
-    WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(
-        rewriter, warpOp, newWarpOpYieldValues, newWarpOpDistTypes);
+    SmallVector<size_t> newIndices;
+    WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
+        rewriter, warpOp, newWarpOpYieldValues, newWarpOpDistTypes, newIndices);
 
     // Next, we create a new `ForOp` with the init args yielded by the new
     // `WarpOp`.
@@ -2086,7 +2077,7 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
                                     // escaping values in the new `WarpOp`.
     SmallVector<Value> newForOpOperands;
     for (size_t i = 0; i < escapingValuesStartIdx; ++i)
-      newForOpOperands.push_back(newWarpOp.getResult(i));
+      newForOpOperands.push_back(newWarpOp.getResult(newIndices[i]));
 
     // Create a new `ForOp` outside the new `WarpOp` region.
     OpBuilder::InsertionGuard g(rewriter);
@@ -2110,7 +2101,7 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
     llvm::SmallDenseMap<Value, int64_t> argIndexMapping;
     for (size_t i = escapingValuesStartIdx;
          i < escapingValuesStartIdx + escapingValues.size(); ++i) {
-      innerWarpInput.push_back(newWarpOp.getResult(i));
+      innerWarpInput.push_back(newWarpOp.getResult(newIndices[i]));
       argIndexMapping[escapingValues[i - escapingValuesStartIdx]] =
           innerWarpInputType.size();
       innerWarpInputType.push_back(
@@ -2146,20 +2137,11 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
     if (!innerWarp.getResults().empty())
       scf::YieldOp::create(rewriter, forOp.getLoc(), innerWarp.getResults());
 
-    // Update the users of original `WarpOp` results that were coming from the
+    // Update the users of the new `WarpOp` results that were coming from the
     // original `ForOp` to the corresponding new `ForOp` result.
     for (auto [origIdx, newIdx] : forResultMapping)
-      rewriter.replaceAllUsesExcept(warpOp.getResult(origIdx),
+      rewriter.replaceAllUsesExcept(newWarpOp.getResult(origIdx),
                                     newForOp.getResult(newIdx), newForOp);
-    // Similarly, update any users of the `WarpOp` results that were not
-    // results of the `ForOp`.
-    for (auto [origIdx, newIdx] : nonForResultMapping)
-      rewriter.replaceAllUsesWith(warpOp.getResult(origIdx),
-                                  newWarpOp.getResult(newIdx));
-    // Remove the original `WarpOp` and `ForOp`, they should not have any uses
-    // at this point.
-    rewriter.eraseOp(forOp);
-    rewriter.eraseOp(warpOp);
     // Update any users of escaping values that were forwarded to the
     // inner `WarpOp`. These values are now arguments of the inner `WarpOp`.
     newForOp.walk([&](Operation *op) {
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
index 14639c5..fbae098 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
@@ -465,26 +465,33 @@ struct UnrollElementwisePattern : public RewritePattern {
     auto targetShape = getTargetShape(options, op);
     if (!targetShape)
       return failure();
+    int64_t targetShapeRank = targetShape->size();
     auto dstVecType = cast<VectorType>(op->getResult(0).getType());
     SmallVector<int64_t> originalSize =
         *cast<VectorUnrollOpInterface>(op).getShapeForUnroll();
-    // Bail-out if rank(source) != rank(target). The main limitation here is the
-    // fact that `ExtractStridedSlice` requires the rank for the input and
-    // output to match. If needed, we can relax this later.
-    if (originalSize.size() != targetShape->size())
-      return rewriter.notifyMatchFailure(
-          op, "expected input vector rank to match target shape rank");
+    int64_t originalShapeRank = originalSize.size();
+
     Location loc = op->getLoc();
+
+    // Handle rank mismatch by adding leading unit dimensions to targetShape
+    SmallVector<int64_t> adjustedTargetShape(originalShapeRank);
+    int64_t rankDiff = originalShapeRank - targetShapeRank;
+    std::fill(adjustedTargetShape.begin(),
+              adjustedTargetShape.begin() + rankDiff, 1);
+    std::copy(targetShape->begin(), targetShape->end(),
+              adjustedTargetShape.begin() + rankDiff);
+
+    int64_t adjustedTargetShapeRank = adjustedTargetShape.size();
     // Prepare the result vector.
     Value result = arith::ConstantOp::create(rewriter, loc, dstVecType,
                                              rewriter.getZeroAttr(dstVecType));
-    SmallVector<int64_t> strides(targetShape->size(), 1);
-    VectorType newVecType =
+    SmallVector<int64_t> strides(adjustedTargetShapeRank, 1);
+    VectorType unrolledVecType =
         VectorType::get(*targetShape, dstVecType.getElementType());
 
     // Create the unrolled computation.
     for (SmallVector<int64_t> offsets :
-         StaticTileOffsetRange(originalSize, *targetShape)) {
+         StaticTileOffsetRange(originalSize, adjustedTargetShape)) {
       SmallVector<Value> extractOperands;
       for (OpOperand &operand : op->getOpOperands()) {
         auto vecType = dyn_cast<VectorType>(operand.get().getType());
@@ -492,14 +499,31 @@ struct UnrollElementwisePattern : public RewritePattern {
           extractOperands.push_back(operand.get());
           continue;
         }
-        extractOperands.push_back(
-            rewriter.createOrFold<vector::ExtractStridedSliceOp>(
-                loc, operand.get(), offsets, *targetShape, strides));
+        Value extracted = rewriter.createOrFold<vector::ExtractStridedSliceOp>(
+            loc, operand.get(), offsets, adjustedTargetShape, strides);
+
+        // Reshape to remove leading unit dims if needed
+        if (adjustedTargetShapeRank > targetShapeRank) {
+          extracted = rewriter.createOrFold<vector::ShapeCastOp>(
+              loc, VectorType::get(*targetShape, vecType.getElementType()),
+              extracted);
+        }
+        extractOperands.push_back(extracted);
       }
+
       Operation *newOp = cloneOpWithOperandsAndTypes(
-          rewriter, loc, op, extractOperands, newVecType);
+          rewriter, loc, op, extractOperands, unrolledVecType);
+
+      Value computeResult = newOp->getResult(0);
+
+      // Use strides sized to targetShape for proper insertion
+      SmallVector<int64_t> insertStrides =
+          (adjustedTargetShapeRank > targetShapeRank)
+              ? SmallVector<int64_t>(targetShapeRank, 1)
+              : strides;
+
       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(
-          loc, newOp->getResult(0), result, offsets, strides);
+          loc, computeResult, result, offsets, insertStrides);
     }
     rewriter.replaceOp(op, result);
     return success();
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 81b5788..e0a8ac4 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -20,8 +20,8 @@
 
 #define DEBUG_TYPE "xegpu"
 
-namespace mlir {
-namespace xegpu {
+using namespace mlir;
+using namespace mlir::xegpu;
 
 static bool isSharedMemory(const MemRefType &memrefTy) {
   Attribute attr = memrefTy.getMemorySpace();
@@ -1133,9 +1133,6 @@ LogicalResult MemDescSubviewOp::verify() {
   return success();
 }
 
-} // namespace xegpu
-} // namespace mlir
-
 namespace mlir {
 #include <mlir/Dialect/XeGPU/IR/XeGPUAttrInterface.cpp.inc>
 } // namespace mlir
diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp
index 776b5c6..4d81918 100644
--- a/mlir/lib/IR/Diagnostics.cpp
+++ b/mlir/lib/IR/Diagnostics.cpp
@@ -378,8 +378,10 @@ struct SourceMgrDiagnosticHandlerImpl {
     }
 
     // Otherwise, try to load the source file.
-    std::string ignored;
-    unsigned id = mgr.AddIncludeFile(std::string(filename), SMLoc(), ignored);
+    auto bufferOrErr = llvm::MemoryBuffer::getFile(filename);
+    if (!bufferOrErr)
+      return 0;
+    unsigned id = mgr.AddNewSourceBuffer(std::move(*bufferOrErr), SMLoc());
     filenameToBufId[filename] = id;
     return id;
   }
diff --git a/mlir/lib/TableGen/CodeGenHelpers.cpp b/mlir/lib/TableGen/CodeGenHelpers.cpp
index cb90ef8..d52d5e7 100644
--- a/mlir/lib/TableGen/CodeGenHelpers.cpp
+++ b/mlir/lib/TableGen/CodeGenHelpers.cpp
@@ -49,9 +49,7 @@ StaticVerifierFunctionEmitter::StaticVerifierFunctionEmitter(
     raw_ostream &os, const RecordKeeper &records, StringRef tag)
     : os(os), uniqueOutputLabel(getUniqueOutputLabel(records, tag)) {}
 
-void StaticVerifierFunctionEmitter::emitOpConstraints(
-    ArrayRef<const Record *> opDefs) {
-  NamespaceEmitter namespaceEmitter(os, Operator(*opDefs[0]).getCppNamespace());
+void StaticVerifierFunctionEmitter::emitOpConstraints() {
   emitTypeConstraints();
   emitAttrConstraints();
   emitPropConstraints();
diff --git a/mlir/lib/Tools/PDLL/Parser/Parser.cpp b/mlir/lib/Tools/PDLL/Parser/Parser.cpp
index c883baa..3236b4f 100644
--- a/mlir/lib/Tools/PDLL/Parser/Parser.cpp
+++ b/mlir/lib/Tools/PDLL/Parser/Parser.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Parser.h"
 #include <optional>
@@ -828,6 +829,7 @@ LogicalResult Parser::parseTdInclude(StringRef filename, llvm::SMRange fileLoc,
   llvm::SourceMgr tdSrcMgr;
   tdSrcMgr.AddNewSourceBuffer(std::move(*includeBuffer), SMLoc());
   tdSrcMgr.setIncludeDirs(parserSrcMgr.getIncludeDirs());
+  tdSrcMgr.setVirtualFileSystem(llvm::vfs::getRealFileSystem());
 
   // This class provides a context argument for the llvm::SourceMgr diagnostic
   // handler.
diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
index 60b9567..1dbe7eca 100644
--- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
+++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/LSP/Logging.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include <optional>
 
 using namespace mlir;
@@ -402,6 +403,7 @@ PDLDocument::PDLDocument(const llvm::lsp::URIForFile &uri, StringRef contents,
   llvm::append_range(includeDirs, extraDirs);
 
   sourceMgr.setIncludeDirs(includeDirs);
+  sourceMgr.setVirtualFileSystem(llvm::vfs::getRealFileSystem());
   sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
 
   astContext.getDiagEngine().setHandlerFn([&](const ast::Diagnostic &diag) {
diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp
index 3080b78..2d817be 100644
--- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp
+++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Support/LSP/Logging.h"
 #include "llvm/Support/LSP/Protocol.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TableGen/Parser.h"
 #include "llvm/TableGen/Record.h"
 #include <optional>
@@ -448,6 +449,7 @@ void TableGenTextFile::initialize(
     return;
   }
   sourceMgr.setIncludeDirs(includeDirs);
+  sourceMgr.setVirtualFileSystem(llvm::vfs::getRealFileSystem());
   sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
 
   // This class provides a context argument for the SourceMgr diagnostic
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 2d33888..d669a3b 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -76,6 +76,18 @@ func.func @broadcast_vec1d_from_f32(%arg0: f32) -> vector<2xf32> {
 
 // -----
 
+func.func @broadcast_single_elem_vec1d_from_f32(%arg0: f32) -> vector<1xf32> {
+  %0 = vector.broadcast %arg0 : f32 to vector<1xf32>
+  return %0 : vector<1xf32>
+}
+// CHECK-LABEL: @broadcast_single_elem_vec1d_from_f32
+// CHECK-SAME:  %[[A:.*]]: f32)
+// CHECK:       %[[T0:.*]] = llvm.insertelement %[[A]]
+// CHECK-NOT:   llvm.shufflevector
+// CHECK:       return %[[T0]] : vector<1xf32>
+
+// -----
+
 func.func @broadcast_vec1d_from_f32_scalable(%arg0: f32) -> vector<[2]xf32> {
   %0 = vector.broadcast %arg0 : f32 to vector<[2]xf32>
   return %0 : vector<[2]xf32>
diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
index 0b150e9..9c552d8 100644
--- a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
+++ b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
@@ -14,19 +14,36 @@ gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>)
   // CHECK: %[[VAR4:.*]] = arith.addi %[[ARG0]], %[[VAR3]] : i64
   // CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1>
   // CHECK: %[[VAR6:.*]] = scf.if %[[VAR2]] -> (f16) {
-  // CHECK:   %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> vector<1xf16>
-  // CHECK:   %[[VAR8:.*]] = vector.extract %[[VAR7]][0] : f16 from vector<1xf16>
-  // CHECK:   scf.yield %[[VAR8]] : f16
-  // CHECK: } else {
-  // CHECK:   %[[CST_0:.*]] = arith.constant dense<0.000000e+00> : vector<1xf16>
-  // CHECK:   %[[VAR7:.*]] = vector.extract %[[CST_0]][0] : f16 from vector<1xf16>
+  // CHECK:   %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> f16
   // CHECK:   scf.yield %[[VAR7]] : f16
+  // CHECK: } else {
+  // CHECK:   %[[CST_0:.*]] = arith.constant 0.000000e+00 : f16
+  // CHECK:   scf.yield %[[CST_0]] : f16
   // CHECK: }
   %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
       : i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
   gpu.return
 }
 }
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: @source_materialize_single_elem_vec
+// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>, %[[ARG2:.*]]: memref<1xf16>
+gpu.func @source_materialize_single_elem_vec(%src: i64, %offset: vector<1xindex>, %dst: memref<1xf16>) {
+  %1 = arith.constant dense<1>: vector<1xi1>
+  %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+      : i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
+  // CHECK: %[[VAR_IF:.*]] = scf.if
+  // CHECK: %[[VAR_RET:.*]] = vector.broadcast %[[VAR_IF]] : f16 to vector<1xf16>
+  // CHECK: %[[C0:.*]] = arith.constant 0 : index
+  // CHECK: vector.store %[[VAR_RET]], %[[ARG2]][%[[C0]]] : memref<1xf16>, vector<1xf16>
+  %c0 = arith.constant 0 : index
+  vector.store %3, %dst[%c0] : memref<1xf16>, vector<1xf16>
+  gpu.return
+}
+}
+
 // -----
 
 gpu.module @test {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
index e2ab876..b52612d 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
@@ -24,10 +24,46 @@
     // CHECK-NOT: copy
     // CHECK: %[[call:.*]]:2 = call @inner_func(%[[arg0]])
     %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
-    // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32,{{.*}}>
+    // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32{{.*}}>
     return %1, %0 : f32, tensor<?xf32>
   }
   "test.finish" () : () -> ()
 }) : () -> ()
 
+// -----
 
+#enc1 = #test.tensor_encoding<"hello">
+#enc2 = #test.tensor_encoding<"not hello">
+
+"test.symbol_scope_isolated"() ({
+  // CHECK: func @inner_func(
+  // CHECK-SAME:  %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>)
+  // CHECK-SAME:  -> memref<?xf32, #test.memref_layout<"hello">>
+  func.func @inner_func(%t: tensor<?xf32, #enc1>)
+      -> tensor<?xf32, #enc1> {
+    // CHECK: return %[[arg0]]
+    return %t : tensor<?xf32, #enc1>
+  }
+
+  // CHECK: func @outer_func(
+  // CHECK-SAME:  %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>)
+  // CHECK-SAME:  -> (memref<?xf32, #test.memref_layout<"hello">>,
+  // CHECK-SAME:      memref<?xf32, #test.memref_layout<"not hello">>)
+  func.func @outer_func(%t0: tensor<?xf32, #enc1>)
+      -> (tensor<?xf32, #enc1>, tensor<?xf32, #enc2>) {
+    // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
+    %0 = call @inner_func(%t0)
+      : (tensor<?xf32, #enc1>) -> (tensor<?xf32, #enc1>)
+
+    // CHECK: %[[local:.*]] = "test.create_memref_op"() : ()
+    // CHECK-SAME:  -> memref<?xf32, #test.memref_layout<"not hello">>
+    %local = "test.create_tensor_op"() : () -> tensor<?xf32, #enc2>
+    // CHECK: %[[dummy:.*]] = "test.dummy_memref_op"(%[[local]])
+    %1 = "test.dummy_tensor_op"(%local) : (tensor<?xf32, #enc2>)
+      -> tensor<?xf32, #enc2>
+
+    // CHECK: return %[[call]], %[[dummy]]
+    return %0, %1 : tensor<?xf32, #enc1>, tensor<?xf32, #enc2>
+  }
+  "test.finish" () : () -> ()
+}) : () -> ()
diff --git a/mlir/test/Dialect/LLVMIR/canonicalize.mlir b/mlir/test/Dialect/LLVMIR/canonicalize.mlir
index 8accf6e..755e3a3 100644
--- a/mlir/test/Dialect/LLVMIR/canonicalize.mlir
+++ b/mlir/test/Dialect/LLVMIR/canonicalize.mlir
@@ -235,6 +235,17 @@ llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr {
 
 // -----
 
+// CHECK-LABEL: fold_shufflevector
+// CHECK-SAME: %[[ARG1:[[:alnum:]]+]]: vector<1xf32>, %[[ARG2:[[:alnum:]]+]]: vector<1xf32>
+llvm.func @fold_shufflevector(%v1 : vector<1xf32>, %v2 : vector<1xf32>) -> vector<1xf32> {
+  // CHECK-NOT: llvm.shufflevector
+  %c = llvm.shufflevector %v1, %v2 [0] : vector<1xf32>
+  // CHECK: llvm.return %[[ARG1]]
+  llvm.return %c : vector<1xf32>
+}
+
+// -----
+
 // Check that LLVM constants participate in cross-dialect constant folding. The
 // resulting constant is created in the arith dialect because the last folded
 // operation belongs to it.
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 358bd33..242c04f 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -1035,6 +1035,20 @@ llvm.func @rocdl.s.wait.expcnt() {
   llvm.return
 }
 
+llvm.func @rocdl.s.wait.asynccnt() {
+  // CHECK-LABEL: rocdl.s.wait.asynccnt
+  // CHECK: rocdl.s.wait.asynccnt 0
+  rocdl.s.wait.asynccnt 0
+  llvm.return
+}
+
+llvm.func @rocdl.s.wait.tensorcnt() {
+  // CHECK-LABEL: rocdl.s.wait.tensorcnt
+  // CHECK: rocdl.s.wait.tensorcnt 0
+  rocdl.s.wait.tensorcnt 0
+  llvm.return
+}
+
 // -----
 
 llvm.func @rocdl.readfirstlane(%src : f32) -> f32 {
diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir
new file mode 100644
index 0000000..023a0e5
--- /dev/null
+++ b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir
@@ -0,0 +1,311 @@
+// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file | FileCheck %s
+
+///===----------------------------------------------===//
+///  Tests of `StepCompareFolder`
+///===----------------------------------------------===//
+
+
+///===------------------------------------===//
+///  Tests of `ugt` (unsigned greater than)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ugt_constant_3_lhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_3_lhs() -> vector<3xi1> {
+  %cst = arith.constant dense<3> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // 3 > [0, 1, 2] => [true, true, true] => true for all indices => fold
+  %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ugt_constant_2_lhs
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_ugt_constant_2_lhs() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // 2 > [0, 1, 2] => [true, true, false] => not same for all indices => don't fold
+  %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_3_rhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_3_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<3> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // [0, 1, 2] > 3 => [false, false, false] => false for all indices => fold
+  %1 = arith.cmpi ugt, %0, %cst : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_max_rhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_max_rhs() -> vector<3xi1> {
+  // The largest i64 possible:
+  %cst = arith.constant dense<0x7fffffffffffffff> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi ugt, %0, %cst: vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_2_rhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_2_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // [0, 1, 2] > 2 => [false, false, false] => false for all indices => fold
+  %1 = arith.cmpi ugt, %0, %cst : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ugt_constant_1_rhs
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_ugt_constant_1_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<1> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // [0, 1, 2] > 1 => [false, false, true] => not same for all indices => don't fold
+  %1 = arith.cmpi ugt, %0, %cst: vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+///  Tests of `uge` (unsigned greater than or equal)
+///===------------------------------------===//
+
+
+// CHECK-LABEL: @uge_constant_2_lhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @uge_constant_2_lhs() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // 2 >= [0, 1, 2] => [true, true, true] => true for all indices => fold
+  %1 = arith.cmpi uge, %cst, %0 : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_uge_constant_1_lhs
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_uge_constant_1_lhs() -> vector<3xi1> {
+  %cst = arith.constant dense<1> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // 1 >= [0, 1, 2] => [true, false, false] => not same for all indices => don't fold
+  %1 = arith.cmpi uge, %cst, %0 : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @uge_constant_3_rhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @uge_constant_3_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<3> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // [0, 1, 2] >= 3 => [false, false, false] => false for all indices => fold
+  %1 = arith.cmpi uge, %0, %cst : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_uge_constant_2_rhs
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_uge_constant_2_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // [0, 1, 2] >= 2 => [false, false, true] => not same for all indices => don't fold
+  %1 = arith.cmpi uge, %0, %cst : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+
+///===------------------------------------===//
+///  Tests of `ult` (unsigned less than)
+///===------------------------------------===//
+
+
+// CHECK-LABEL: @ult_constant_2_lhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ult_constant_2_lhs() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // 2 < [0, 1, 2] => [false, false, false] => false for all indices => fold
+  %1 = arith.cmpi ult, %cst, %0 : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ult_constant_1_lhs
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_ult_constant_1_lhs() -> vector<3xi1> {
+  %cst = arith.constant dense<1> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // 1 < [0, 1, 2] => [false, false, true] => not same for all indices => don't fold
+  %1 = arith.cmpi ult, %cst, %0 : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ult_constant_3_rhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ult_constant_3_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<3> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // [0, 1, 2] < 3 => [true, true, true] => true for all indices => fold
+  %1 = arith.cmpi ult, %0, %cst : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ult_constant_2_rhs
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_ult_constant_2_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  // [0, 1, 2] < 2 => [true, true, false] => not same for all indices => don't fold
+  %1 = arith.cmpi ult, %0, %cst : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+///  Tests of `ule` (unsigned less than or equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ule_constant_3_lhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ule_constant_3_lhs() -> vector<3xi1> {
+  %cst = arith.constant dense<3> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi ule, %cst, %0 : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ule_constant_2_lhs
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_ule_constant_2_lhs() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi ule, %cst, %0 : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ule_constant_2_rhs
+//       CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ule_constant_2_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi ule, %0, %cst : vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ule_constant_1_rhs
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_ule_constant_1_rhs() -> vector<3xi1> {
+  %cst = arith.constant dense<1> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi ule, %0, %cst: vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+///  Tests of `eq` (equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @eq_constant_3
+//       CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @eq_constant_3() -> vector<3xi1> {
+  %cst = arith.constant dense<3> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi eq, %0, %cst: vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_eq_constant_2
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_eq_constant_2() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi eq, %0, %cst: vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+///  Tests of `ne` (not equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ne_constant_3
+//       CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+//       CHECK: return %[[CST]] : vector<3xi1>
+func.func @ne_constant_3() -> vector<3xi1> {
+  %cst = arith.constant dense<3> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi ne, %0, %cst: vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ne_constant_2
+//       CHECK: %[[CMP:.*]] = arith.cmpi
+//       CHECK: return %[[CMP]]
+func.func @negative_ne_constant_2() -> vector<3xi1> {
+  %cst = arith.constant dense<2> : vector<3xindex>
+  %0 = vector.step : vector<3xindex>
+  %1 = arith.cmpi ne, %0, %cst: vector<3xindex>
+  return %1 : vector<3xi1>
+}
+
diff --git a/mlir/test/Dialect/Vector/vector-unroll-options.mlir b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
index 35db14e..e5a98b5 100644
--- a/mlir/test/Dialect/Vector/vector-unroll-options.mlir
+++ b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
@@ -188,15 +188,38 @@ func.func @vector_fma(%a: vector<4x4xf32>, %b: vector<4x4xf32>, %c: vector<4x4xf
 //   CHECK-LABEL: func @vector_fma
 // CHECK-COUNT-4: vector.fma %{{.+}}, %{{.+}}, %{{.+}} : vector<2x2xf32>
 
-// TODO: We should be able to unroll this like the example above - this will require extending UnrollElementwisePattern.
-func.func @negative_vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
+func.func @vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
   %0 = vector.fma %a, %a, %a : vector<3x2x2xf32>
   return %0 : vector<3x2x2xf32>
 }
-// CHECK-LABEL: func @negative_vector_fma_3d
-//   CHECK-NOT: vector.extract_strided_slice
-//       CHECK: %[[R0:.*]] = vector.fma %{{.+}} : vector<3x2x2xf32>
-//       CHECK: return
+// CHECK-LABEL: func @vector_fma_3d
+//  CHECK-SAME:   (%[[SRC:.*]]: vector<3x2x2xf32>) -> vector<3x2x2xf32> {
+//       CHECK:   %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<3x2x2xf32>
+//       CHECK:   %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[E_OUT_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_OUT_0:.*]] = vector.shape_cast %[[E_OUT_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[FMA0:.*]] = vector.fma %[[S_LHS_0]], %[[S_RHS_0]], %[[S_OUT_0]] : vector<2x2xf32>
+//       CHECK:   %[[I0:.*]] = vector.insert_strided_slice %[[FMA0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+//       CHECK:   %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[E_OUT_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_OUT_1:.*]] = vector.shape_cast %[[E_OUT_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[FMA1:.*]] = vector.fma %[[S_LHS_1]], %[[S_RHS_1]], %[[S_OUT_1]] : vector<2x2xf32>
+//       CHECK:   %[[I1:.*]] = vector.insert_strided_slice %[[FMA1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+//       CHECK:   %[[E_LHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_LHS_2:.*]] = vector.shape_cast %[[E_LHS_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[E_RHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_RHS_2:.*]] = vector.shape_cast %[[E_RHS_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[E_OUT_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_OUT_2:.*]] = vector.shape_cast %[[E_OUT_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[FMA2:.*]] = vector.fma %[[S_LHS_2]], %[[S_RHS_2]], %[[S_OUT_2]] : vector<2x2xf32>
+//       CHECK:   %[[I2:.*]] = vector.insert_strided_slice %[[FMA2]], %[[I1]] {offsets = [2, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+//       CHECK:   return %[[I2]] : vector<3x2x2xf32>
 
 func.func @vector_multi_reduction(%v : vector<4x6xf32>, %acc: vector<4xf32>) -> vector<4xf32> {
   %0 = vector.multi_reduction #vector.kind<add>, %v, %acc [1] : vector<4x6xf32> to vector<4xf32>
@@ -440,3 +463,36 @@ func.func @vector_step() -> vector<32xindex> {
 // CHECK: %[[ADD3:.*]] = arith.addi %[[STEP]], %[[CST]] : vector<8xindex>
 // CHECK: %[[INS3:.*]] = vector.insert_strided_slice %[[ADD3]], %[[INS2]] {offsets = [24], strides = [1]} : vector<8xindex> into vector<32xindex>
 // CHECK: return %[[INS3]] : vector<32xindex>
+
+
+func.func @elementwise_3D_to_2D(%v1: vector<2x2x2xf32>, %v2: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
+  %0 = arith.addf %v1, %v2 : vector<2x2x2xf32>
+  return %0 : vector<2x2x2xf32>
+}
+// CHECK-LABEL: func @elementwise_3D_to_2D
+//  CHECK-SAME: (%[[ARG0:.*]]: vector<2x2x2xf32>, %[[ARG1:.*]]: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
+//       CHECK:   %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<2x2x2xf32>
+//       CHECK:   %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[ADD0:.*]] = arith.addf %[[S_LHS_0]], %[[S_RHS_0]] : vector<2x2xf32>
+//       CHECK:   %[[I0:.*]] = vector.insert_strided_slice %[[ADD0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32>
+//       CHECK:   %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+//       CHECK:   %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+//       CHECK:   %[[ADD1:.*]] = arith.addf %[[S_LHS_1]], %[[S_RHS_1]] : vector<2x2xf32>
+//       CHECK:   %[[I1:.*]] = vector.insert_strided_slice %[[ADD1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32>
+//       CHECK:   return %[[I1]] : vector<2x2x2xf32>
+
+
+func.func @elementwise_4D_to_2D(%v1: vector<2x2x2x2xf32>, %v2: vector<2x2x2x2xf32>) -> vector<2x2x2x2xf32> {
+  %0 = arith.addf %v1, %v2 : vector<2x2x2x2xf32>
+  return %0 : vector<2x2x2x2xf32>
+}
+
+// CHECK-LABEL: func @elementwise_4D_to_2D
+// CHECK-COUNT-4:   arith.addf %{{.*}}, %{{.*}} : vector<2x2xf32>
+// CHECK-NOT: arith.addf
+// CHECK: return
diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
index bb76392..401cdd29 100644
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -1925,3 +1925,22 @@ func.func @warp_scf_if_distribute(%pred : i1)  {
 //       CHECK-PROP:    "some_use"(%[[IF_YIELD_DIST]]) : (vector<1xf32>) -> ()
 //       CHECK-PROP:    return
 //       CHECK-PROP:  }
+
+// -----
+func.func @dedup_unused_result(%laneid : index) -> (vector<1xf32>) {
+  %r:3 = gpu.warp_execute_on_lane_0(%laneid)[32] ->
+    (vector<1xf32>, vector<2xf32>, vector<1xf32>) {
+    %2 = "some_def"() : () -> (vector<32xf32>)
+    %3 = "some_def"() : () -> (vector<64xf32>)
+    gpu.yield %2, %3, %2 : vector<32xf32>, vector<64xf32>, vector<32xf32>
+  }
+  %r0 = "some_use"(%r#2, %r#2) : (vector<1xf32>, vector<1xf32>) -> (vector<1xf32>)
+  return %r0 : vector<1xf32>
+}
+
+// CHECK-PROP: func @dedup_unused_result
+// CHECK-PROP: %[[R:.*]] = gpu.warp_execute_on_lane_0(%arg0)[32] -> (vector<1xf32>)
+// CHECK-PROP:   %[[Y0:.*]] = "some_def"() : () -> vector<32xf32>
+// CHECK-PROP:   %[[Y1:.*]] = "some_def"() : () -> vector<64xf32>
+// CHECK-PROP:   gpu.yield %[[Y0]] : vector<32xf32>
+// CHECK-PROP: "some_use"(%[[R]], %[[R]]) : (vector<1xf32>, vector<1xf32>) -> vector<1xf32>
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index 78e1e659..6cccfe4 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -567,3 +567,25 @@ llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_invalid_return_typ
   %res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i1
   llvm.return
 }
+
+// -----
+
+// Test that ensures invalid row/col layouts for matrices A and B are not accepted
+llvm.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, %b0 : i32, %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> {
+  // expected-error@+1 {{Only m8n8k4 with f16 supports other layouts.}}
+  %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
+    {layoutA = #nvvm.mma_layout<col>, layoutB = #nvvm.mma_layout<col>,
+     multiplicandAPtxType = #nvvm.mma_type<s4>, multiplicandBPtxType = #nvvm.mma_type<s4>,
+     intOverflowBehavior=#nvvm.mma_int_overflow<satfinite>,
+     shape = #nvvm.shape<m = 16, n = 8, k = 32>} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)>
+  llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)>
+}
+
+// -----
+
+// Test for range validation - invalid range where lower == upper but not at extremes
+func.func @invalid_range_equal_bounds() {
+  // expected-error @below {{invalid range attribute: Lower == Upper, but they aren't min (0) or max (4294967295) value! This is an invalid constant range.}}
+  %0 = nvvm.read.ptx.sreg.warpsize range <i32, 32, 32> : i32
+  return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 00a479d..594ae48 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -152,6 +152,10 @@ llvm.func @nvvm_special_regs() -> i32 {
   %74 = nvvm.read.ptx.sreg.lanemask.ge : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt
   %75 = nvvm.read.ptx.sreg.lanemask.gt : i32
+  // CHECK: %76 = call range(i32 0, 0) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %76 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 0> : i32
+  // CHECK: %77 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %77 = nvvm.read.ptx.sreg.tid.x range <i32, 4294967295, 4294967295> : i32
   llvm.return %1 : i32
 }
 
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index fdd2c91..6536fac 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -276,6 +276,20 @@ llvm.func @rocdl.s.wait.expcnt() {
   llvm.return
 }
 
+llvm.func @rocdl.s.wait.asynccnt() {
+  // CHECK-LABEL: rocdl.s.wait.asynccnt
+  // CHECK-NEXT: call void @llvm.amdgcn.s.wait.asynccnt(i16 0)
+  rocdl.s.wait.asynccnt 0
+  llvm.return
+}
+
+llvm.func @rocdl.s.wait.tensorcnt() {
+  // CHECK-LABEL: rocdl.s.wait.tensorcnt
+  // CHECK-NEXT: call void @llvm.amdgcn.s.wait.tensorcnt(i16 0)
+  rocdl.s.wait.tensorcnt 0
+  llvm.return
+}
+
 llvm.func @rocdl.setprio() {
   // CHECK: call void @llvm.amdgcn.s.setprio(i16 0)
   rocdl.s.setprio 0
diff --git a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
index 1e2d4a7..4069a74 100644
--- a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
+++ b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
@@ -11,11 +11,25 @@
 #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
 #include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"
 
+#include "TestAttributes.h" // TestTensorEncodingAttr, TestMemRefLayoutAttr
+#include "TestDialect.h"
+
 using namespace mlir;
 
 namespace {
+MemRefLayoutAttrInterface
+getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) {
+  if (auto encoding = dyn_cast_if_present<test::TestTensorEncodingAttr>(
+          tensorType.getEncoding())) {
+    return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get(
+        tensorType.getContext(), encoding.getDummy()));
+  }
+  return {};
+}
+
 struct TestOneShotModuleBufferizePass
     : public PassWrapper<TestOneShotModuleBufferizePass, OperationPass<>> {
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestOneShotModuleBufferizePass)
@@ -25,6 +39,7 @@ struct TestOneShotModuleBufferizePass
       : PassWrapper(pass) {}
 
   void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<test::TestDialect>();
     registry.insert<bufferization::BufferizationDialect>();
   }
   StringRef getArgument() const final {
@@ -41,6 +56,17 @@ struct TestOneShotModuleBufferizePass
     bufferization::OneShotBufferizationOptions opt;
 
     opt.bufferizeFunctionBoundaries = true;
+    opt.functionArgTypeConverterFn =
+        [&](bufferization::TensorLikeType tensor, Attribute memSpace,
+            func::FuncOp, const bufferization::BufferizationOptions &) {
+          assert(isa<RankedTensorType>(tensor) && "tests only builtin tensors");
+          auto tensorType = cast<RankedTensorType>(tensor);
+          auto layout = getMemRefLayoutForTensorEncoding(tensorType);
+          return cast<bufferization::BufferLikeType>(
+              MemRefType::get(tensorType.getShape(),
+                              tensorType.getElementType(), layout, memSpace));
+        };
+
     bufferization::BufferizationState bufferizationState;
 
     if (failed(bufferization::runOneShotModuleBufferize(getOperation(), opt,
diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td
index 5685004..9e7e4f8 100644
--- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td
+++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td
@@ -22,6 +22,7 @@ include "mlir/IR/AttrTypeBase.td"
 include "mlir/IR/BuiltinAttributeInterfaces.td"
 include "mlir/IR/EnumAttr.td"
 include "mlir/IR/OpAsmInterface.td"
+include "mlir/IR/TensorEncoding.td"
 
 // All of the attributes will extend this class.
 class Test_Attr<string name, list<Trait> traits = []>
@@ -439,4 +440,20 @@ def TestCustomStorageCtorAttr : Test_Attr<"TestCustomStorageCtorAttr"> {
     let hasStorageCustomConstructor = 1;
 }
 
+def TestTensorEncodingAttr : Test_Attr<"TestTensorEncoding",
+    [DeclareAttrInterfaceMethods<VerifiableTensorEncoding>]> {
+  let mnemonic = "tensor_encoding";
+
+  let parameters = (ins "mlir::StringAttr":$dummy);
+  let assemblyFormat = "`<` $dummy `>`";
+}
+
+def TestMemRefLayoutAttr : Test_Attr<"TestMemRefLayout",
+    [DeclareAttrInterfaceMethods<MemRefLayoutAttrInterface>]> {
+  let mnemonic = "memref_layout";
+
+  let parameters = (ins "mlir::StringAttr":$dummy);
+  let assemblyFormat = "`<` $dummy `>`";
+}
+
 #endif // TEST_ATTRDEFS
diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp
index fe1e916..9db7b01 100644
--- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp
+++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp
@@ -542,6 +542,24 @@ test::detail::TestCustomStorageCtorAttrAttrStorage::construct(
 }
 
 //===----------------------------------------------------------------------===//
+// TestTensorEncodingAttr
+//===----------------------------------------------------------------------===//
+
+::llvm::LogicalResult TestTensorEncodingAttr::verifyEncoding(
+    mlir::ArrayRef<int64_t> shape, mlir::Type elementType,
+    llvm::function_ref<::mlir::InFlightDiagnostic()> emitError) const {
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// TestMemRefLayoutAttr
+//===----------------------------------------------------------------------===//
+
+mlir::AffineMap TestMemRefLayoutAttr::getAffineMap() const {
+  return mlir::AffineMap::getMultiDimIdentityMap(1, getContext());
+}
+
+//===----------------------------------------------------------------------===//
 // TestDialect
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.h b/mlir/test/lib/Dialect/Test/TestAttributes.h
index 778d84fa..0ad5ab6 100644
--- a/mlir/test/lib/Dialect/Test/TestAttributes.h
+++ b/mlir/test/lib/Dialect/Test/TestAttributes.h
@@ -24,6 +24,7 @@
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/DialectResourceBlobManager.h"
+#include "mlir/IR/TensorEncoding.h"
 
 // generated files require above includes to come first
 #include "TestAttrInterfaces.h.inc"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h
index f2adca6..bcf3b55d 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.h
+++ b/mlir/test/lib/Dialect/Test/TestDialect.h
@@ -18,6 +18,7 @@
 #include "TestInterfaces.h"
 #include "TestTypes.h"
 #include "mlir/Bytecode/BytecodeImplementation.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
 #include "mlir/Dialect/DLTI/Traits.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.td b/mlir/test/lib/Dialect/Test/TestDialect.td
index 2b5491f..37a263f 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.td
+++ b/mlir/test/lib/Dialect/Test/TestDialect.td
@@ -24,7 +24,10 @@ def Test_Dialect : Dialect {
   let useDefaultTypePrinterParser = 0;
   let useDefaultAttributePrinterParser = 1;
   let isExtensible = 1;
-  let dependentDialects = ["::mlir::DLTIDialect"];
+  let dependentDialects = [
+    "::mlir::DLTIDialect",
+    "::mlir::bufferization::BufferizationDialect"
+  ];
   let discardableAttrs = (ins
      "mlir::IntegerAttr":$discardable_attr_key,
      "SimpleAAttr":$other_discardable_attr_key
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index 53055fe..b211e24 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1425,6 +1425,39 @@ TestMultiSlotAlloca::handleDestructuringComplete(
   return createNewMultiAllocaWithoutSlot(slot, builder, *this);
 }
 
+namespace {
+/// Returns test dialect's memref layout for test dialect's tensor encoding when
+/// applicable.
+MemRefLayoutAttrInterface
+getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) {
+  if (auto encoding =
+          dyn_cast<test::TestTensorEncodingAttr>(tensorType.getEncoding())) {
+    return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get(
+        tensorType.getContext(), encoding.getDummy()));
+  }
+  return {};
+}
+
+/// Auxiliary bufferization function for test and builtin tensors.
+bufferization::BufferLikeType
+convertTensorToBuffer(mlir::Operation *op,
+                      const bufferization::BufferizationOptions &options,
+                      bufferization::TensorLikeType tensorLike) {
+  auto buffer =
+      *tensorLike.getBufferType(options, [&]() { return op->emitError(); });
+  if (auto memref = dyn_cast<MemRefType>(buffer)) {
+    // Note: For the sake of testing, we want to ensure that encoding -> layout
+    // bufferization happens. This is currently achieved manually.
+    auto layout =
+        getMemRefLayoutForTensorEncoding(cast<RankedTensorType>(tensorLike));
+    return cast<bufferization::BufferLikeType>(
+        MemRefType::get(memref.getShape(), memref.getElementType(), layout,
+                        memref.getMemorySpace()));
+  }
+  return buffer;
+}
+} // namespace
+
 ::mlir::LogicalResult test::TestDummyTensorOp::bufferize(
     ::mlir::RewriterBase &rewriter,
     const ::mlir::bufferization::BufferizationOptions &options,
@@ -1435,8 +1468,8 @@ TestMultiSlotAlloca::handleDestructuringComplete(
     return failure();
 
   const auto outType = getOutput().getType();
-  const auto bufferizedOutType = test::TestMemrefType::get(
-      getContext(), outType.getShape(), outType.getElementType(), nullptr);
+  const auto bufferizedOutType =
+      convertTensorToBuffer(getOperation(), options, outType);
   // replace op with memref analogy
   auto dummyMemrefOp = test::TestDummyMemrefOp::create(
       rewriter, getLoc(), bufferizedOutType, *buffer);
@@ -1470,13 +1503,12 @@ TestMultiSlotAlloca::handleDestructuringComplete(
 
 mlir::FailureOr<mlir::bufferization::BufferLikeType>
 test::TestCreateTensorOp::getBufferType(
-    mlir::Value value, const mlir::bufferization::BufferizationOptions &,
+    mlir::Value value, const mlir::bufferization::BufferizationOptions &options,
     const mlir::bufferization::BufferizationState &,
     llvm::SmallVector<::mlir::Value> &) {
-  const auto type = dyn_cast<test::TestTensorType>(value.getType());
+  const auto type = dyn_cast<bufferization::TensorLikeType>(value.getType());
   if (type == nullptr)
     return failure();
 
-  return cast<mlir::bufferization::BufferLikeType>(test::TestMemrefType::get(
-      getContext(), type.getShape(), type.getElementType(), nullptr));
+  return convertTensorToBuffer(getOperation(), options, type);
 }
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 6329d61..05a33cf 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -32,6 +32,7 @@ include "mlir/Interfaces/MemorySlotInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/ValueBoundsOpInterface.td"
 include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td"
+include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td"
 
 // Include the attribute definitions.
 include "TestAttrDefs.td"
@@ -2335,7 +2336,7 @@ def SideEffectWithRegionOp : TEST_Op<"side_effect_with_region_op",
 }
 
 //===----------------------------------------------------------------------===//
-// Copy Operation Test 
+// Copy Operation Test
 //===----------------------------------------------------------------------===//
 
 def CopyOp : TEST_Op<"copy", []> {
@@ -3676,10 +3677,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op",
         ["bufferize", "bufferizesToMemoryRead",
          "bufferizesToMemoryWrite", "getAliasingValues"]>]> {
   let arguments = (ins
-    Arg<TestTensorType>:$input
+    Arg<Bufferization_TensorLikeTypeInterface>:$input
   );
   let results = (outs
-    Arg<TestTensorType>:$output
+    Arg<Bufferization_TensorLikeTypeInterface>:$output
   );
 
   let extraClassDefinition = [{
@@ -3701,10 +3702,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op",
 
 def TestDummyMemrefOp : TEST_Op<"dummy_memref_op", []> {
   let arguments = (ins
-    Arg<TestMemrefType>:$input
+    Arg<Bufferization_BufferLikeTypeInterface>:$input
   );
   let results = (outs
-    Arg<TestMemrefType>:$output
+    Arg<Bufferization_BufferLikeTypeInterface>:$output
   );
 }
 
@@ -3714,7 +3715,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op",
          "bufferizesToMemoryWrite", "getAliasingValues",
          "bufferizesToAllocation"]>]> {
   let arguments = (ins);
-  let results = (outs Arg<TestTensorType>:$output);
+  let results = (outs Arg<Bufferization_TensorLikeTypeInterface>:$output);
   let extraClassDefinition = [{
     bool test::TestCreateTensorOp::bufferizesToMemoryRead(::mlir::OpOperand&,
         const ::mlir::bufferization::AnalysisState&) {
@@ -3738,7 +3739,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op",
 
 def TestCreateMemrefOp : TEST_Op<"create_memref_op"> {
   let arguments = (ins);
-  let results = (outs Arg<TestMemrefType>:$output);
+  let results = (outs Arg<Bufferization_BufferLikeTypeInterface>:$output);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/mlir-tblgen/cpp-class-comments.td b/mlir/test/mlir-tblgen/cpp-class-comments.td
index a896888..9dcf975 100644
--- a/mlir/test/mlir-tblgen/cpp-class-comments.td
+++ b/mlir/test/mlir-tblgen/cpp-class-comments.td
@@ -96,17 +96,14 @@ def EncodingTrait : AttrInterface<"EncodingTrait"> {
   }];
   let methods = [
   ];
-// ATTR-INTERFACE: namespace mlir
-// ATTR-INTERFACE-NEXT: namespace a
-// ATTR-INTERFACE-NEXT: namespace traits
+// ATTR-INTERFACE: namespace mlir::a::traits {
 // ATTR-INTERFACE-NEXT: /// Common trait for all layouts.
 // ATTR-INTERFACE-NEXT: class EncodingTrait;
 }
 
 def SimpleEncodingTrait : AttrInterface<"SimpleEncodingTrait"> {
   let cppNamespace = "a::traits";
-// ATTR-INTERFACE: namespace a {
-// ATTR-INTERFACE-NEXT: namespace traits {
+// ATTR-INTERFACE: namespace a::traits {
 // ATTR-INTERFACE-NEXT: class SimpleEncodingTrait;
 }
 
@@ -116,8 +113,7 @@ def SimpleOpInterface : OpInterface<"SimpleOpInterface"> {
 
     Simple Op Interface description
     }];
-// OP-INTERFACE: namespace a {
-// OP-INTERFACE-NEXT: namespace traits {
+// OP-INTERFACE: namespace a::traits {
 // OP-INTERFACE-NEXT: /// Simple Op Interface description
 // OP-INTERFACE-NEXT: class SimpleOpInterface;
 }
diff --git a/mlir/tools/mlir-pdll/mlir-pdll.cpp b/mlir/tools/mlir-pdll/mlir-pdll.cpp
index f99dcdb..76122a0 100644
--- a/mlir/tools/mlir-pdll/mlir-pdll.cpp
+++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include <set>
 
 using namespace mlir;
@@ -41,6 +42,7 @@ processBuffer(raw_ostream &os, std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
               bool dumpODS, std::set<std::string> *includedFiles) {
   llvm::SourceMgr sourceMgr;
   sourceMgr.setIncludeDirs(includeDirs);
+  sourceMgr.setVirtualFileSystem(llvm::vfs::getRealFileSystem());
   sourceMgr.AddNewSourceBuffer(std::move(chunkBuffer), SMLoc());
 
   // If we are dumping ODS information, also enable documentation to ensure the
diff --git a/mlir/tools/mlir-tblgen/EnumsGen.cpp b/mlir/tools/mlir-tblgen/EnumsGen.cpp
index d55ad482..11bf9ce 100644
--- a/mlir/tools/mlir-tblgen/EnumsGen.cpp
+++ b/mlir/tools/mlir-tblgen/EnumsGen.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/CodeGenHelpers.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
@@ -701,11 +702,7 @@ static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
   StringRef underlyingToSymFnName = enumInfo.getUnderlyingToSymbolFnName();
   auto enumerants = enumInfo.getAllCases();
 
-  SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(cppNamespace, namespaces, "::");
-
-  for (auto ns : namespaces)
-    os << "namespace " << ns << " {\n";
+  llvm::NamespaceEmitter ns(os, cppNamespace);
 
   // Emit the enum class definition
   emitEnumClass(enumDef, enumName, underlyingType, description, enumerants, os);
@@ -766,8 +763,7 @@ public:
     os << formatv(attrClassDecl, enumName, attrClassName, baseAttrClassName);
   }
 
-  for (auto ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
+  ns.close();
 
   // Generate a generic parser and printer for the enum.
   std::string qualName =
@@ -790,13 +786,8 @@ static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
 
 static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
   EnumInfo enumInfo(enumDef);
-  StringRef cppNamespace = enumInfo.getCppNamespace();
 
-  SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(cppNamespace, namespaces, "::");
-
-  for (auto ns : namespaces)
-    os << "namespace " << ns << " {\n";
+  llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
 
   if (enumInfo.isBitEnum()) {
     emitSymToStrFnForBitEnum(enumDef, os);
@@ -810,10 +801,6 @@ static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
 
   if (enumInfo.genSpecializedAttr())
     emitSpecializedAttrDef(enumDef, os);
-
-  for (auto ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
-  os << "\n";
 }
 
 static bool emitEnumDefs(const RecordKeeper &records, raw_ostream &os) {
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index daae3c7..3718648 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -4896,7 +4896,7 @@ static void emitOpClassDefs(const RecordKeeper &records,
                                                       constraintPrefix);
   os << formatv(opCommentHeader, "Local Utility Method", "Definitions");
   staticVerifierEmitter.collectOpConstraints(defs);
-  staticVerifierEmitter.emitOpConstraints(defs);
+  staticVerifierEmitter.emitOpConstraints();
 
   // Emit the classes.
   emitOpClasses(records, defs, os, staticVerifierEmitter,
diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
index 730b5b2..ab8d534 100644
--- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/CodeGenHelpers.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
@@ -342,11 +343,7 @@ void InterfaceGenerator::emitModelDecl(const Interface &interface) {
 }
 
 void InterfaceGenerator::emitModelMethodsDef(const Interface &interface) {
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
-  for (StringRef ns : namespaces)
-    os << "namespace " << ns << " {\n";
-
+  llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
   for (auto &method : interface.getMethods()) {
     os << "template<typename " << valueTemplate << ">\n";
     emitCPPType(method.getReturnType(), os);
@@ -442,18 +439,11 @@ void InterfaceGenerator::emitModelMethodsDef(const Interface &interface) {
                         method.isStatic() ? &ctx : &nonStaticMethodFmt);
     os << "\n}\n";
   }
-
-  for (StringRef ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
 }
 
 void InterfaceGenerator::emitInterfaceTraitDecl(const Interface &interface) {
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
-  for (StringRef ns : namespaces)
-    os << "namespace " << ns << " {\n";
-
-  os << "namespace detail {\n";
+  auto cppNamespace = (interface.getCppNamespace() + "::detail").str();
+  llvm::NamespaceEmitter ns(os, cppNamespace);
 
   StringRef interfaceName = interface.getName();
   auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
@@ -504,10 +494,6 @@ void InterfaceGenerator::emitInterfaceTraitDecl(const Interface &interface) {
     os << tblgen::tgfmt(*extraTraitDecls, &traitMethodFmt) << "\n";
 
   os << "  };\n";
-  os << "}// namespace detail\n";
-
-  for (StringRef ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
 }
 
 static void emitInterfaceDeclMethods(const Interface &interface,
@@ -533,10 +519,7 @@ static void emitInterfaceDeclMethods(const Interface &interface,
 }
 
 void InterfaceGenerator::forwardDeclareInterface(const Interface &interface) {
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
-  for (StringRef ns : namespaces)
-    os << "namespace " << ns << " {\n";
+  llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
 
   // Emit a forward declaration of the interface class so that it becomes usable
   // in the signature of its methods.
@@ -545,16 +528,10 @@ void InterfaceGenerator::forwardDeclareInterface(const Interface &interface) {
 
   StringRef interfaceName = interface.getName();
   os << "class " << interfaceName << ";\n";
-
-  for (StringRef ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
 }
 
 void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) {
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
-  for (StringRef ns : namespaces)
-    os << "namespace " << ns << " {\n";
+  llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
 
   StringRef interfaceName = interface.getName();
   auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
@@ -631,9 +608,6 @@ void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) {
   }
 
   os << "};\n";
-
-  for (StringRef ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
 }
 
 bool InterfaceGenerator::emitInterfaceDecls() {
diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
index 3ead2f0..ca291b5 100644
--- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
+++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
@@ -259,8 +259,8 @@ static void emitInterfaceDecl(const Availability &availability,
   std::string interfaceTraitsName =
       std::string(formatv("{0}Traits", interfaceName));
 
-  StringRef cppNamespace = availability.getInterfaceClassNamespace();
-  llvm::NamespaceEmitter nsEmitter(os, cppNamespace);
+  llvm::NamespaceEmitter nsEmitter(os,
+                                   availability.getInterfaceClassNamespace());
   os << "class " << interfaceName << ";\n\n";
 
   // Emit the traits struct containing the concept and model declarations.
@@ -418,15 +418,9 @@ static void emitAvailabilityQueryForBitEnum(const Record &enumDef,
 static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
   EnumInfo enumInfo(enumDef);
   StringRef enumName = enumInfo.getEnumClassName();
-  StringRef cppNamespace = enumInfo.getCppNamespace();
   auto enumerants = enumInfo.getAllCases();
 
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(cppNamespace, namespaces, "::");
-
-  for (auto ns : namespaces)
-    os << "namespace " << ns << " {\n";
-
+  llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
   llvm::StringSet<> handledClasses;
 
   // Place all availability specifications to their corresponding
@@ -441,9 +435,6 @@ static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
                     enumName);
       handledClasses.insert(className);
     }
-
-  for (auto ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
 }
 
 static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
@@ -459,31 +450,19 @@ static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
 
 static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
   EnumInfo enumInfo(enumDef);
-  StringRef cppNamespace = enumInfo.getCppNamespace();
-
-  llvm::SmallVector<StringRef, 2> namespaces;
-  llvm::SplitString(cppNamespace, namespaces, "::");
-
-  for (auto ns : namespaces)
-    os << "namespace " << ns << " {\n";
+  llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
 
-  if (enumInfo.isBitEnum()) {
+  if (enumInfo.isBitEnum())
     emitAvailabilityQueryForBitEnum(enumDef, os);
-  } else {
+  else
     emitAvailabilityQueryForIntEnum(enumDef, os);
-  }
-
-  for (auto ns : llvm::reverse(namespaces))
-    os << "} // namespace " << ns << "\n";
-  os << "\n";
 }
 
 static bool emitEnumDefs(const RecordKeeper &records, raw_ostream &os) {
   llvm::emitSourceFileHeader("SPIR-V Enum Availability Definitions", os,
                              records);
 
-  auto defs = records.getAllDerivedDefinitions("EnumInfo");
-  for (const auto *def : defs)
+  for (const Record *def : records.getAllDerivedDefinitions("EnumInfo"))
     emitEnumDef(*def, os);
 
   return false;
diff --git a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
index b2e1edf..7a32983f 100644
--- a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
+++ b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
@@ -8,7 +8,7 @@
 // REQUIRES: gpu
 //
 // FIXME: https://github.com/llvm/llvm-project/issues/161265
-// XFAIL: gpu
+// UNSUPPORTED: gpu
 
 #include <stdio.h>
 
diff --git a/offload/test/offloading/barrier_fence.c b/offload/test/offloading/barrier_fence.c
index 73d259d..e43db0a5 100644
--- a/offload/test/offloading/barrier_fence.c
+++ b/offload/test/offloading/barrier_fence.c
@@ -4,6 +4,9 @@
 // RUN: %libomptarget-run-generic
 
 // REQUIRES: gpu
+//
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: gpu
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/offload/test/offloading/interop-print.c b/offload/test/offloading/interop-print.c
new file mode 100644
index 0000000..a386420
--- /dev/null
+++ b/offload/test/offloading/interop-print.c
@@ -0,0 +1,83 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa
+// RUN:   %libomptarget-run-generic 2>&1 | \
+// RUN:   %fcheck-amdgcn-amd-amdhsa -check-prefixes=AMD
+
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda
+// RUN:   %libomptarget-run-generic 2>&1 | \
+// RUN:   %fcheck-nvptx64-nvidia-cuda -check-prefixes=NVIDIA
+
+// REQUIRES: gpu
+// XFAIL: nvptx64-nvidia-cuda
+
+#include <omp.h>
+#include <stdio.h>
+
+const char *interop_int_to_string(const int interop_int) {
+  switch (interop_int) {
+  case 1:
+    return "cuda";
+  case 2:
+    return "cuda_driver";
+  case 3:
+    return "opencl";
+  case 4:
+    return "sycl";
+  case 5:
+    return "hip";
+  case 6:
+    return "level_zero";
+  case 7:
+    return "hsa";
+  default:
+    return "unknown";
+  }
+}
+
+int main(int argc, char **argv) {
+
+  // Loop over all available devices
+  for (int id = 0; id < omp_get_num_devices(); ++id) {
+    omp_interop_t iobj = omp_interop_none;
+
+    // TODO: Change targetsync to target when AMD toolchain supports it.
+#pragma omp interop init(target : iobj) device(id)
+
+    int err;
+    int interop_int = omp_get_interop_int(iobj, omp_ipr_fr_id, &err);
+
+    if (err) {
+      fprintf(stderr, "omp_get_interop_int failed: %d\n", err);
+      return -1;
+    }
+
+    // AMD: {{.*}} hsa
+    // NVIDIA: {{.*}} cuda
+    printf("omp_get_interop_int returned %s\n",
+           interop_int_to_string(interop_int));
+
+    const char *interop_vendor =
+        omp_get_interop_str(iobj, omp_ipr_vendor_name, &err);
+    if (err) {
+      fprintf(stderr, "omp_get_interop_str failed: %d\n", err);
+      return -1;
+    }
+
+    // AMD: {{.*}} amd
+    // NVIDIA: {{.*}} nvidia
+    printf("omp_get_interop_str returned %s\n", interop_vendor);
+
+    const char *interop_fr_name =
+        omp_get_interop_str(iobj, omp_ipr_fr_name, &err);
+    if (err) {
+      fprintf(stderr, "omp_get_interop_str failed: %d\n", err);
+      return -1;
+    }
+
+    // AMD: {{.*}} hsa
+    // NVIDIA: {{.*}} cuda
+    printf("omp_get_interop_str returned %s\n", interop_fr_name);
+
+#pragma omp interop destroy(iobj)
+  }
+  return 0;
+}
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 83afc0e..3ffec41 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -97,12 +97,15 @@ class kmp_stats_list;
 // OMPD_SKIP_HWLOC used in libompd/omp-icv.cpp to avoid OMPD depending on hwloc
 #if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED && !defined(OMPD_SKIP_HWLOC)
 #include "hwloc.h"
+#define KMP_HWLOC_ENABLED 1
 #ifndef HWLOC_OBJ_NUMANODE
 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
 #endif
 #ifndef HWLOC_OBJ_PACKAGE
 #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
 #endif
+#else
+#define KMP_HWLOC_ENABLED 0
 #endif
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -672,10 +675,10 @@ typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
 extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
 #endif /* KMP_OS_WINDOWS */
 
-#if KMP_USE_HWLOC && !defined(OMPD_SKIP_HWLOC)
+#if KMP_HWLOC_ENABLED
 extern hwloc_topology_t __kmp_hwloc_topology;
 extern int __kmp_hwloc_error;
-#endif
+#endif // KMP_HWLOC_ENABLED
 
 extern size_t __kmp_affin_mask_size;
 #define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
@@ -784,10 +787,10 @@ public:
   static void destroy_api();
   enum api_type {
     NATIVE_OS
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
     ,
     HWLOC
-#endif
+#endif // KMP_HWLOC_ENABLED
   };
   virtual api_type get_api_type() const {
     KMP_ASSERT(0);
@@ -856,9 +859,9 @@ enum affinity_top_method {
   affinity_top_method_group,
 #endif /* KMP_GROUP_AFFINITY */
   affinity_top_method_flat,
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   affinity_top_method_hwloc,
-#endif
+#endif // KMP_HWLOC_ENABLED
   affinity_top_method_default
 };
 
@@ -1125,9 +1128,9 @@ typedef struct kmp_allocator_t {
   omp_alloctrait_value_t target_access;
   omp_alloctrait_value_t atomic_scope;
   size_t part_size;
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   omp_alloctrait_value_t membind;
-#endif
+#endif // KMP_HWLOC_ENABLED
 } kmp_allocator_t;
 
 extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
@@ -2087,12 +2090,12 @@ typedef struct dispatch_shared_info {
 #if KMP_USE_HIER_SCHED
   void *hier;
 #endif
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   // When linking with libhwloc, the ORDERED EPCC test slows down on big
   // machines (> 48 cores). Performance analysis showed that a cache thrash
   // was occurring and this padding helps alleviate the problem.
   char padding[64];
-#endif
+#endif // KMP_HWLOC_ENABLED
 } dispatch_shared_info_t;
 
 typedef struct kmp_disp {
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 6bfdfbf..6a0e291 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -19,13 +19,13 @@
 #if KMP_USE_HIER_SCHED
 #include "kmp_dispatch_hier.h"
 #endif
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
 // Copied from hwloc
 #define HWLOC_GROUP_KIND_INTEL_MODULE 102
 #define HWLOC_GROUP_KIND_INTEL_TILE 103
 #define HWLOC_GROUP_KIND_INTEL_DIE 104
 #define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220
-#endif
+#endif // KMP_HWLOC_ENABLED
 #include <ctype.h>
 
 // The machine topology
@@ -1440,7 +1440,7 @@ void KMPAffinity::pick_api() {
   KMPAffinity *affinity_dispatch;
   if (picked_api)
     return;
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   // Only use Hwloc if affinity isn't explicitly disabled and
   // user requests Hwloc topology method
   if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
@@ -1448,7 +1448,7 @@ void KMPAffinity::pick_api() {
     affinity_dispatch = new KMPHwlocAffinity();
     __kmp_hwloc_available = true;
   } else
-#endif
+#endif // KMP_HWLOC_ENABLED
   {
     affinity_dispatch = new KMPNativeAffinity();
   }
@@ -1699,7 +1699,7 @@ kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
 // Original mask is a subset of full mask in multiple processor groups topology
 kmp_affin_mask_t *__kmp_affin_origMask = NULL;
 
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
 static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
 #if HWLOC_API_VERSION >= 0x00020000
   return hwloc_obj_type_is_cache(obj->type);
@@ -2007,7 +2007,7 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
   __kmp_topology->sort_ids();
   return true;
 }
-#endif // KMP_USE_HWLOC
+#endif // KMP_HWLOC_ENABLED
 
 // If we don't know how to retrieve the machine's processor topology, or
 // encounter an error in doing so, this routine is called to form a "flat"
@@ -4854,7 +4854,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
 // In the default code path, errors are not fatal - we just try using
 // another method. We only emit a warning message if affinity is on, or the
 // verbose flag is set, an the nowarnings flag was not set.
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
     if (!success &&
         __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
       if (!__kmp_hwloc_error) {
@@ -4866,7 +4866,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
         KMP_INFORM(AffIgnoringHwloc, env_var);
       }
     }
-#endif
+#endif // KMP_HWLOC_ENABLED
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
     if (!success) {
@@ -4914,7 +4914,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
 // If the user has specified that a paricular topology discovery method is to be
 // used, then we abort if that method fails. The exception is group affinity,
 // which might have been implicitly set.
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
     KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
     success = __kmp_affinity_create_hwloc_map(&msg_id);
@@ -4923,7 +4923,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
       KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
     }
   }
-#endif // KMP_USE_HWLOC
+#endif // KMP_HWLOC_ENABLED
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
   else if (__kmp_affinity_top_method == affinity_top_method_x2apicid ||
@@ -5322,12 +5322,12 @@ void __kmp_affinity_uninitialize(void) {
     __kmp_free(__kmp_osid_to_hwthread_map);
     __kmp_osid_to_hwthread_map = NULL;
   }
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   if (__kmp_hwloc_topology != NULL) {
     hwloc_topology_destroy(__kmp_hwloc_topology);
     __kmp_hwloc_topology = NULL;
   }
-#endif
+#endif // KMP_HWLOC_ENABLED
   if (__kmp_hw_subset) {
     kmp_hw_subset_t::deallocate(__kmp_hw_subset);
     __kmp_hw_subset = nullptr;
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index dc3191c..fa69585 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -18,7 +18,7 @@
 #include <limits>
 
 #if KMP_AFFINITY_SUPPORTED
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
 class KMPHwlocAffinity : public KMPAffinity {
 public:
   class Mask : public KMPAffinity::Mask {
@@ -109,7 +109,7 @@ public:
       }
       return error;
     }
-#endif
+#endif // KMP_OS_WINDOWS
     int get_proc_group() const override {
       int group = -1;
 #if KMP_OS_WINDOWS
@@ -191,7 +191,7 @@ public:
   }
   api_type get_api_type() const override { return HWLOC; }
 };
-#endif /* KMP_USE_HWLOC */
+#endif /* KMP_HWLOC_ENABLED */
 
 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY ||     \
     KMP_OS_AIX
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index 051f88c..d43daef 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -14,7 +14,7 @@
 #include "kmp_io.h"
 #include "kmp_wrapper_malloc.h"
 
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
 #if HWLOC_API_VERSION > 0x00020300
 #define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
 #elif HWLOC_API_VERSION == 0x00020300
@@ -26,7 +26,7 @@ enum hwloc_memattr_id_e {
   HWLOC_MEMATTR_ID_CAPACITY
 };
 #endif
-#endif // KMP_USE_HWLOC
+#endif // KMP_HWLOC_ENABLED
 
 // Disable bget when it is not used
 #if KMP_USE_BGET
@@ -1545,7 +1545,7 @@ void __kmp_fini_memkind() {
 #endif
 }
 
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
 static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
 #if HWLOC_API_VERSION >= 0x00020300
   const hwloc_topology_support *support;
@@ -1561,7 +1561,7 @@ static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
   return false;
 #else
   return false;
-#endif
+#endif // KMP_HWLOC_ENABLED
 }
 
 void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
@@ -1611,7 +1611,7 @@ void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
   return NULL;
 #endif
 }
-#endif // KMP_USE_HWLOC
+#endif // KMP_HWLOC_ENABLED
 
 void __kmp_init_target_mem() {
   *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
@@ -1680,13 +1680,13 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
       al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
       break;
     case omp_atk_partition:
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
       al->membind = (omp_alloctrait_value_t)traits[i].value;
       KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
                        al->membind == omp_atv_nearest ||
                        al->membind == omp_atv_blocked ||
                        al->membind == omp_atv_interleaved);
-#endif
+#endif // KMP_HWLOC_ENABLED
       al->memkind = RCAST(void **, traits[i].value);
       break;
     case omp_atk_pin_device:
@@ -1980,7 +1980,7 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
     }
   }
 
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   if (__kmp_hwloc_available) {
     if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
       if (allocator < kmp_max_mem_alloc) {
@@ -2074,7 +2074,7 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
       ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
     }
   } else {
-#endif
+#endif // KMP_HWLOC_ENABLED
     if (__kmp_memkind_available) {
       if (allocator < kmp_max_mem_alloc) {
         // pre-defined allocator
@@ -2201,9 +2201,9 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
         KMP_ASSERT(0); // abort fallback requested
       } // no sense to look for another fallback because of same internal alloc
     }
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   }
-#endif
+#endif // KMP_HWLOC_ENABLED
   KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
   if (ptr == NULL)
     return NULL;
@@ -2339,7 +2339,7 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
     kmp_target_unlock_mem(desc.ptr_alloc, device);
   }
 
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   if (__kmp_hwloc_available) {
     if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
       kmp_uint64 used =
@@ -2349,7 +2349,7 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
     }
     hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
   } else {
-#endif
+#endif // KMP_HWLOC_ENABLED
     if (__kmp_memkind_available) {
       if (oal < kmp_max_mem_alloc) {
         // pre-defined allocator
@@ -2378,9 +2378,9 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
       }
       __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
     }
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   }
-#endif
+#endif // KMP_HWLOC_ENABLED
 }
 
 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
diff --git a/openmp/runtime/src/kmp_dispatch.h b/openmp/runtime/src/kmp_dispatch.h
index cf19eb5..f161a80 100644
--- a/openmp/runtime/src/kmp_dispatch.h
+++ b/openmp/runtime/src/kmp_dispatch.h
@@ -182,12 +182,12 @@ template <typename T> struct dispatch_shared_info_template {
 #if KMP_USE_HIER_SCHED
   kmp_hier_t<T> *hier;
 #endif
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   // When linking with libhwloc, the ORDERED EPCC test slowsdown on big
   // machines (> 48 cores). Performance analysis showed that a cache thrash
   // was occurring and this padding helps alleviate the problem.
   char padding[64];
-#endif
+#endif // KMP_HWLOC_ENABLED
 };
 
 /* ------------------------------------------------------------------------ */
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 323d13e..6c3b576 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -248,10 +248,10 @@ enum mic_type __kmp_mic_type = non_mic;
 
 KMPAffinity *__kmp_affinity_dispatch = NULL;
 
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
 int __kmp_hwloc_error = FALSE;
 hwloc_topology_t __kmp_hwloc_topology = NULL;
-#endif
+#endif // KMP_HWLOC_ENABLED
 
 #if KMP_OS_WINDOWS
 #if KMP_GROUP_AFFINITY
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index b9d615f4..b6e7e9ca 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -1069,10 +1069,10 @@ static void __kmp_stg_print_warnings(kmp_str_buf_t *buffer, char const *name,
 static void __kmp_stg_parse_nesting_mode(char const *name, char const *value,
                                          void *data) {
   __kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_nesting_mode);
-#if KMP_AFFINITY_SUPPORTED && KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   if (__kmp_nesting_mode > 0)
     __kmp_affinity_top_method = affinity_top_method_hwloc;
-#endif
+#endif // KMP_HWLOC_ENABLED
 } // __kmp_stg_parse_nesting_mode
 
 static void __kmp_stg_print_nesting_mode(kmp_str_buf_t *buffer,
@@ -3301,11 +3301,11 @@ static void __kmp_stg_parse_topology_method(char const *name, char const *value,
   if (__kmp_str_match("all", 1, value)) {
     __kmp_affinity_top_method = affinity_top_method_all;
   }
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   else if (__kmp_str_match("hwloc", 1, value)) {
     __kmp_affinity_top_method = affinity_top_method_hwloc;
   }
-#endif
+#endif // KMP_HWLOC_ENABLED
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
   else if (__kmp_str_match("cpuid_leaf31", 12, value) ||
            __kmp_str_match("cpuid 1f", 8, value) ||
@@ -3409,11 +3409,11 @@ static void __kmp_stg_print_topology_method(kmp_str_buf_t *buffer,
     break;
 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
   case affinity_top_method_hwloc:
     value = "hwloc";
     break;
-#endif
+#endif // KMP_HWLOC_ENABLED
 
   case affinity_top_method_cpuinfo:
     value = "cpuinfo";
@@ -6277,7 +6277,7 @@ void __kmp_env_initialize(char const *string) {
 #if KMP_AFFINITY_SUPPORTED
 
   if (!TCR_4(__kmp_init_middle)) {
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
     // Force using hwloc when either tiles or numa nodes requested within
     // KMP_HW_SUBSET or granularity setting and no other topology method
     // is requested
@@ -6292,12 +6292,12 @@ void __kmp_env_initialize(char const *string) {
     if (__kmp_affinity.gran == KMP_HW_NUMA ||
         __kmp_affinity.gran == KMP_HW_TILE)
       __kmp_affinity_top_method = affinity_top_method_hwloc;
-#endif
+#endif // KMP_HWLOC_ENABLED
     // Determine if the machine/OS is actually capable of supporting
     // affinity.
     const char *var = "KMP_AFFINITY";
     KMPAffinity::pick_api();
-#if KMP_USE_HWLOC
+#if KMP_HWLOC_ENABLED
     // If Hwloc topology discovery was requested but affinity was also disabled,
     // then tell user that Hwloc request is being ignored and use default
     // topology discovery method.
@@ -6306,7 +6306,7 @@ void __kmp_env_initialize(char const *string) {
       KMP_WARNING(AffIgnoringHwloc, var);
       __kmp_affinity_top_method = affinity_top_method_all;
     }
-#endif
+#endif // KMP_HWLOC_ENABLED
     if (__kmp_affinity.type == affinity_disabled) {
       KMP_AFFINITY_DISABLE();
     } else if (!KMP_AFFINITY_CAPABLE()) {
diff --git a/openmp/tools/omptest/include/InternalEvent.h b/openmp/tools/omptest/include/InternalEvent.h
index 1348c48..743fad9 100644
--- a/openmp/tools/omptest/include/InternalEvent.h
+++ b/openmp/tools/omptest/include/InternalEvent.h
@@ -371,6 +371,7 @@ struct BufferRecordDeallocation : public EventBase<BufferRecordDeallocation> {
 // take precedence over the following default equality operator definition.
 bool operator==(const ParallelBegin &, const ParallelBegin &);
 bool operator==(const Work &, const Work &);
+bool operator==(const Dispatch &, const Dispatch &);
 bool operator==(const ImplicitTask &, const ImplicitTask &);
 bool operator==(const SyncRegion &, const SyncRegion &);
 bool operator==(const Target &, const Target &);
diff --git a/openmp/tools/omptest/src/InternalEventOperators.cpp b/openmp/tools/omptest/src/InternalEventOperators.cpp
index 1ec33d5..dde6409 100644
--- a/openmp/tools/omptest/src/InternalEventOperators.cpp
+++ b/openmp/tools/omptest/src/InternalEventOperators.cpp
@@ -36,6 +36,11 @@ bool operator==(const Work &Expected, const Work &Observed) {
          isSameTaskData && isSameCount;
 }
 
+bool operator==(const Dispatch &Expected, const Dispatch &Observed) {
+  bool isSameKind = (Expected.Kind == Observed.Kind);
+  return isSameKind;
+}
+
 bool operator==(const ImplicitTask &Expected, const ImplicitTask &Observed) {
   bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint);
   bool isSameActualParallelism =
diff --git a/openmp/tools/omptest/test/unittests/internal-event-eq-test.cpp b/openmp/tools/omptest/test/unittests/internal-event-eq-test.cpp
new file mode 100644
index 0000000..d30d6da
--- /dev/null
+++ b/openmp/tools/omptest/test/unittests/internal-event-eq-test.cpp
@@ -0,0 +1,65 @@
+#include "InternalEvent.h"
+#include <omp-tools.h>
+#include <sstream>
+
+#include "gtest/gtest.h"
+
+using namespace omptest;
+
+TEST(InternalEvent_equality_ops, Dispatch_identity) {
+  ompt_data_t DI{.value = 31};
+  internal::Dispatch D{/*ParallelData=*/(ompt_data_t *)0x11,
+                       /*TaskData=*/(ompt_data_t *)0x22,
+                       /*Kind=*/ompt_dispatch_t::ompt_dispatch_iteration,
+                       /*Instance=*/DI};
+
+  EXPECT_EQ(D == D, true);
+}
+
+TEST(InternalEvent_equality_ops, Dispatch_same) {
+  ompt_data_t DI{.ptr = (void *)0x33};
+  internal::Dispatch D1{/*ParallelData=*/(ompt_data_t *)0x11,
+                        /*TaskData=*/(ompt_data_t *)0x22,
+                        /*Kind=*/ompt_dispatch_t::ompt_dispatch_section,
+                        /*Instance=*/DI};
+
+  internal::Dispatch D2{/*ParallelData=*/(ompt_data_t *)0x11,
+                        /*TaskData=*/(ompt_data_t *)0x22,
+                        /*Kind=*/ompt_dispatch_t::ompt_dispatch_section,
+                        /*Instance=*/DI};
+
+  EXPECT_EQ(D1 == D2, true);
+}
+
+TEST(InternalEvent_equality_ops, Dispatch_different_kind) {
+  ompt_data_t DI{.ptr = (void *)0x33};
+  internal::Dispatch D1{/*ParallelData=*/(ompt_data_t *)0x11,
+                        /*TaskData=*/(ompt_data_t *)0x22,
+                        /*Kind=*/ompt_dispatch_t::ompt_dispatch_section,
+                        /*Instance=*/DI};
+
+  internal::Dispatch D2{/*ParallelData=*/(ompt_data_t *)0x11,
+                        /*TaskData=*/(ompt_data_t *)0x22,
+                        /*Kind=*/ompt_dispatch_t::ompt_dispatch_iteration,
+                        /*Instance=*/DI};
+
+  // Demonstrate that 'Kind' is the only relevant field for equality.
+  EXPECT_EQ(D1 == D2, false);
+}
+
+TEST(InternalEvent_equality_ops, Dispatch_same_kind_different_other) {
+  ompt_data_t DI1{.ptr = (void *)0x33};
+  internal::Dispatch D1{/*ParallelData=*/(ompt_data_t *)0x11,
+                        /*TaskData=*/(ompt_data_t *)0x22,
+                        /*Kind=*/ompt_dispatch_t::ompt_dispatch_section,
+                        /*Instance=*/DI1};
+
+  ompt_data_t DI2{.ptr = (void *)0x66};
+  internal::Dispatch D2{/*ParallelData=*/(ompt_data_t *)0x44,
+                        /*TaskData=*/(ompt_data_t *)0x55,
+                        /*Kind=*/ompt_dispatch_t::ompt_dispatch_section,
+                        /*Instance=*/DI2};
+
+  // Demonstrate that 'Kind' is the only relevant field for equality.
+  EXPECT_EQ(D1 == D2, true);
+}
diff --git a/openmp/tools/omptest/test/unittests/internal-event-test.cpp b/openmp/tools/omptest/test/unittests/internal-event-tostring-test.cpp
index fcd8956..fcd8956 100644
--- a/openmp/tools/omptest/test/unittests/internal-event-test.cpp
+++ b/openmp/tools/omptest/test/unittests/internal-event-tostring-test.cpp
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 936bc12..c7e3aa6 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -1502,6 +1502,21 @@ libc_support_library(
 )
 
 libc_support_library(
+    name = "__support_osutil_linux_auxv",
+    hdrs = ["src/__support/OSUtil/linux/auxv.h"],
+    target_compatible_with = select({
+        "@platforms//os:linux": [],
+        "//conditions:default": ["@platforms//:incompatible"],
+    }),
+    deps = [
+        ":__support_common",
+        ":__support_osutil_syscall",
+        ":__support_threads_callonce",
+        ":hdr_fcntl_macros",
+    ],
+)
+
+libc_support_library(
     name = "__support_osutil_vdso",
     hdrs = [
         "src/__support/OSUtil/linux/vdso.h",
@@ -6336,6 +6351,19 @@ libc_function(
     ],
 )
 
+# WARNING: NOT FULLY IMPLEMENTED, FOR TESTING USE ONLY
+libc_function(
+    name = "sysconf",
+    srcs = ["src/unistd/linux/sysconf.cpp"],
+    hdrs = ["src/unistd/sysconf.h"],
+    deps = [
+        ":__support_common",
+        ":__support_osutil_linux_auxv",
+        ":errno",
+        ":hdr_unistd_macros",
+    ],
+)
+
 libc_function(
     name = "write",
     srcs = ["src/unistd/linux/write.cpp"],
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/sys/mman/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/sys/mman/BUILD.bazel
index e2c7f7a..6de76e2 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/sys/mman/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/sys/mman/BUILD.bazel
@@ -30,6 +30,7 @@ libc_test(
         "//libc:mmap",
         "//libc:munlock",
         "//libc:munmap",
+        "//libc:sysconf",
     ],
 )
 
@@ -48,6 +49,7 @@ libc_test(
         "//libc:munlock",
         "//libc:munlockall",
         "//libc:munmap",
+        "//libc:sysconf",
     ],
 )
 
@@ -89,6 +91,7 @@ libc_test(
         "//libc:msync",
         "//libc:munlock",
         "//libc:munmap",
+        "//libc:sysconf",
     ],
 )
 
@@ -111,6 +114,7 @@ libc_test(
         "//libc:munmap",
         "//libc:open",
         "//libc:remap_file_pages",
+        "//libc:sysconf",
     ],
 )
 
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 74d632b..ac58e39 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -713,6 +713,19 @@ mlir_c_api_cc_library(
 )
 
 mlir_c_api_cc_library(
+    name = "CAPIShape",
+    srcs = ["lib/CAPI/Dialect/Shape.cpp"],
+    hdrs = ["include/mlir-c/Dialect/Shape.h"],
+    capi_deps = [
+        ":CAPIIR",
+    ],
+    includes = ["include"],
+    deps = [
+        ":ShapeDialect",
+    ],
+)
+
+mlir_c_api_cc_library(
     name = "CAPITarget",
     srcs = ["lib/CAPI/Target/LLVMIR.cpp"],
     hdrs = ["include/mlir-c/Target/LLVMIR.h"],
@@ -734,6 +747,19 @@ mlir_c_api_cc_library(
 )
 
 mlir_c_api_cc_library(
+    name = "CAPITensor",
+    srcs = ["lib/CAPI/Dialect/Tensor.cpp"],
+    hdrs = ["include/mlir-c/Dialect/Tensor.h"],
+    capi_deps = [
+        ":CAPIIR",
+    ],
+    includes = ["include"],
+    deps = [
+        ":TensorDialect",
+    ],
+)
+
+mlir_c_api_cc_library(
     name = "CAPIGPU",
     srcs = [
         "lib/CAPI/Dialect/GPU.cpp",
@@ -14199,6 +14225,7 @@ cc_library(
         ":TransformUtils",
         ":VectorDialect",
         ":XeGPUDialect",
+        ":XeGPUUtils",
         ":XeVMDialect",
         "//llvm:Support",
     ],