73 files changed, 2599 insertions, 581 deletions
diff --git a/clang-tools-extra/clang-tidy/google/CMakeLists.txt b/clang-tools-extra/clang-tidy/google/CMakeLists.txt
index 2470c08..1d4229e 100644
--- a/clang-tools-extra/clang-tidy/google/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/google/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_library(clangTidyGoogleModule STATIC
   DefaultArgumentsCheck.cpp
   ExplicitConstructorCheck.cpp
   ExplicitMakePairCheck.cpp
+  FloatTypesCheck.cpp
   FunctionNamingCheck.cpp
   GlobalNamesInHeadersCheck.cpp
   GlobalVariableDeclarationCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/google/FloatTypesCheck.cpp b/clang-tools-extra/clang-tidy/google/FloatTypesCheck.cpp
new file mode 100644
index 0000000..3d5fb02
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/google/FloatTypesCheck.cpp
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FloatTypesCheck.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Lex/Lexer.h"
+
+namespace clang {
+
+using namespace ast_matchers;
+
+namespace {
+
+AST_POLYMORPHIC_MATCHER(isValidAndNotInMacro,
+                        AST_POLYMORPHIC_SUPPORTED_TYPES(TypeLoc,
+                                                        FloatingLiteral)) {
+  const SourceLocation Loc = Node.getBeginLoc();
+  return Loc.isValid() && !Loc.isMacroID();
+}
+
+AST_MATCHER(TypeLoc, isLongDoubleType) {
+  TypeLoc TL = Node;
+  if (const auto QualLoc = Node.getAs<QualifiedTypeLoc>())
+    TL = QualLoc.getUnqualifiedLoc();
+
+  const auto BuiltinLoc = TL.getAs<BuiltinTypeLoc>();
+  if (!BuiltinLoc)
+    return false;
+
+  if (const auto *BT = BuiltinLoc.getTypePtr())
+    return BT->getKind() == BuiltinType::LongDouble;
+  return false;
+}
+
+AST_MATCHER(FloatingLiteral, isLongDoubleLiteral) {
+  if (const auto *BT =
+          dyn_cast_if_present<BuiltinType>(Node.getType().getTypePtr()))
+    return BT->getKind() == BuiltinType::LongDouble;
+  return false;
+}
+
+} // namespace
+
+namespace tidy::google::runtime {
+
+void RuntimeFloatCheck::registerMatchers(MatchFinder *Finder) {
+  Finder->addMatcher(typeLoc(loc(realFloatingPointType()),
+                             isValidAndNotInMacro(), isLongDoubleType())
+                         .bind("longDoubleTypeLoc"),
+                     this);
+  Finder->addMatcher(floatLiteral(isValidAndNotInMacro(), isLongDoubleLiteral())
+                         .bind("longDoubleFloatLiteral"),
+                     this);
+}
+
+void RuntimeFloatCheck::check(const MatchFinder::MatchResult &Result) {
+  if (const auto *TL = Result.Nodes.getNodeAs<TypeLoc>("longDoubleTypeLoc")) {
+    diag(TL->getBeginLoc(), "%0 type is not portable and should not be used")
+        << TL->getType();
+  }
+
+  if (const auto *FL =
+          Result.Nodes.getNodeAs<FloatingLiteral>("longDoubleFloatLiteral")) {
+    diag(FL->getBeginLoc(), "%0 type from literal suffix 'L' is not portable "
+                            "and should not be used")
+        << FL->getType();
+  }
+}
+
+} // namespace tidy::google::runtime
+
+} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/google/FloatTypesCheck.h b/clang-tools-extra/clang-tidy/google/FloatTypesCheck.h
new file mode 100644
index 0000000..b5534c0
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/google/FloatTypesCheck.h
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FLOATTYPESCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FLOATTYPESCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang::tidy::google::runtime {
+
+/// Finds usages of `long double` and suggests against their use due to lack
+/// of portability.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/google/runtime-float.html
+class RuntimeFloatCheck : public ClangTidyCheck {
+public:
+  RuntimeFloatCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
+    return LangOpts.CPlusPlus && !LangOpts.ObjC;
+  }
+};
+
+} // namespace clang::tidy::google::runtime
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FLOATTYPESCHECK_H
diff --git a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp
index 5343e2b..eb5666b 100644
--- a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp
@@ -19,6 +19,7 @@
 #include "DefaultArgumentsCheck.h"
 #include "ExplicitConstructorCheck.h"
 #include "ExplicitMakePairCheck.h"
+#include "FloatTypesCheck.h"
 #include "FunctionNamingCheck.h"
 #include "GlobalNamesInHeadersCheck.h"
 #include "GlobalVariableDeclarationCheck.h"
@@ -57,6 +58,8 @@ public:
         "google-objc-function-naming");
     CheckFactories.registerCheck<objc::GlobalVariableDeclarationCheck>(
         "google-objc-global-variable-declaration");
+    CheckFactories.registerCheck<runtime::RuntimeFloatCheck>(
+        "google-runtime-float");
     CheckFactories.registerCheck<runtime::IntegerTypesCheck>(
         "google-runtime-int");
     CheckFactories.registerCheck<runtime::OverloadedUnaryAndCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 28620a92..23d757b 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -155,6 +155,12 @@ New checks
   Finds calls to ``operator[]`` in STL containers and suggests replacing them
   with safe alternatives.
 
+- New :doc:`google-runtime-float
+  <clang-tidy/checks/google/runtime-float>` check.
+
+  Finds uses of ``long double`` and suggests against their use due to lack of
+  portability.
+
 - New :doc:`llvm-mlir-op-builder
   <clang-tidy/checks/llvm/use-new-mlir-op-builder>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/google/runtime-float.rst b/clang-tools-extra/docs/clang-tidy/checks/google/runtime-float.rst
new file mode 100644
index 0000000..4b853ad
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/google/runtime-float.rst
@@ -0,0 +1,10 @@
+.. title:: clang-tidy - google-runtime-float
+
+google-runtime-float
+====================
+
+Finds uses of ``long double`` and suggests against their use due to lack of
+portability.
+
+The corresponding style guide rule:
+https://google.github.io/styleguide/cppguide.html#Floating-Point_Types
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 89ad491..c490d2e 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -238,6 +238,7 @@ Clang-Tidy Checks
    :doc:`google-readability-avoid-underscore-in-googletest-name <google/readability-avoid-underscore-in-googletest-name>`,
    :doc:`google-readability-casting <google/readability-casting>`,
    :doc:`google-readability-todo <google/readability-todo>`,
+   :doc:`google-runtime-float <google/runtime-float>`,
    :doc:`google-runtime-int <google/runtime-int>`,
    :doc:`google-runtime-operator <google/runtime-operator>`,
    :doc:`google-upgrade-googletest-case <google/upgrade-googletest-case>`, "Yes"
diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/runtime-float.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/runtime-float.cpp
new file mode 100644
index 0000000..5c9cc11
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/google/runtime-float.cpp
@@ -0,0 +1,40 @@
+// RUN: %check_clang_tidy %s google-runtime-float %t
+
+long double foo;
+// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: 'long double' type is not portable and should not be used [google-runtime-float]
+
+typedef long double MyLongDouble;
+// CHECK-MESSAGES: :[[@LINE-1]]:9: warning: 'long double' type is not portable and should not be used [google-runtime-float]
+
+typedef long double MyOtherLongDouble; // NOLINT
+
+template <typename T>
+void tmpl() { T i; }
+
+long volatile double v = 10;
+// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: 'volatile long double' type is not portable and should not be used [google-runtime-float]
+
+long double h(long const double aaa, long double bbb = 0.5L) {
+  // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: 'long double' type is not portable and should not be used [google-runtime-float]
+  // CHECK-MESSAGES: :[[@LINE-2]]:15: warning: 'const long double' type is not portable and should not be used [google-runtime-float]
+  // CHECK-MESSAGES: :[[@LINE-3]]:38: warning: 'long double' type is not portable and should not be used [google-runtime-float]
+  // CHECK-MESSAGES: :[[@LINE-4]]:56: warning: 'long double' type from literal suffix 'L' is not portable and should not be used [google-runtime-float]
+  double x = 0.1;
+  double y = 0.2L;
+  // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: 'long double' type from literal suffix 'L' is not portable and should not be used [google-runtime-float]
+#define ldtype long double
+  ldtype z;
+  tmpl<long double>();
+  // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: 'long double' type is not portable and should not be used [google-runtime-float]
+  return 0;
+}
+
+struct S{};
+constexpr S operator"" _baz(unsigned long long) {
+  long double j;
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'long double' type is not portable and should not be used [google-runtime-float]
+  MyOtherLongDouble x;
+  long int a = 1L;
+  return S{};
+}
+
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 10bf96a..1e48fa5 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -954,7 +954,7 @@ def PatchableFunctionEntry
     : InheritableAttr,
       TargetSpecificAttr<TargetArch<
           ["aarch64", "aarch64_be", "loongarch32", "loongarch64", "riscv32",
-           "riscv64", "x86", "x86_64", "ppc", "ppc64"]>> {
+           "riscv64", "x86", "x86_64", "ppc", "ppc64", "ppc64le"]>> {
   let Spellings = [GCC<"patchable_function_entry">];
   let Subjects = SubjectList<[Function, ObjCMethod]>;
   let Args = [UnsignedArgument<"Count">, DefaultIntArgument<"Offset", 0>,
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index dd22e55..ab3f9e4 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -6719,7 +6719,7 @@ if omitted.``Section`` defaults  to the ``-fpatchable-function-entry`` section n
 set, or to ``__patchable_function_entries`` otherwise.
 
 This attribute is only supported on
-aarch64/aarch64-be/loongarch32/loongarch64/riscv32/riscv64/i386/x86-64/ppc/ppc64 targets.
+aarch64/aarch64-be/loongarch32/loongarch64/riscv32/riscv64/i386/x86-64/ppc/ppc64/ppc64le targets.
 For ppc/ppc64 targets, AIX is still not supported.
 }];
 }
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a357a88..2371128 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6760,7 +6760,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (!Triple.isAArch64() && !Triple.isLoongArch() && !Triple.isRISCV() &&
         !Triple.isX86() &&
         !(!Triple.isOSAIX() && (Triple.getArch() == llvm::Triple::ppc ||
-                                Triple.getArch() == llvm::Triple::ppc64)))
+                                Triple.getArch() == llvm::Triple::ppc64 ||
+                                Triple.getArch() == llvm::Triple::ppc64le)))
       D.Diag(diag::err_drv_unsupported_opt_for_target)
           << A->getAsString(Args) << TripleStr;
     else if (S.consumeInteger(10, Size) ||
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp
index 27a9113..03eeb99 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp
@@ -232,14 +232,11 @@ public:
         if (!Init)
           return nullptr;
         if (auto *Lambda = dyn_cast<LambdaExpr>(Init)) {
+          DeclRefExprsToIgnore.insert(DRE);
           updateIgnoreList();
           return Lambda;
         }
-        TempExpr = dyn_cast<CXXBindTemporaryExpr>(Init->IgnoreParenCasts());
-        if (!TempExpr)
-          return nullptr;
-        updateIgnoreList();
-        return dyn_cast_or_null<LambdaExpr>(TempExpr->getSubExpr());
+        return nullptr;
       }
 
       void checkCalleeLambda(CallExpr *CE) {
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
index 3079f8e..0b8af0d 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
@@ -448,4 +448,27 @@ void ranges_for_each(RefCountable* obj) {
     obj->method();
     ++(*static_cast<unsigned*>(item));
   });
-}
-\ No newline at end of file
+}
+
+class RefCountedObj {
+public:
+  void ref();
+  void deref();
+
+  void call() const;
+  void callLambda([[clang::noescape]] const WTF::Function<void ()>& callback) const;
+  void doSomeWork() const;
+};
+
+void RefCountedObj::callLambda([[clang::noescape]] const WTF::Function<void ()>& callback) const
+{
+    callback();
+}
+
+void RefCountedObj::call() const
+{
+    auto lambda = [&] {
+        doSomeWork();
+    };
+    callLambda(lambda);
+}
diff --git a/clang/test/Driver/fpatchable-function-entry.c b/clang/test/Driver/fpatchable-function-entry.c
index 43be6c5..5248a7c0 100644
--- a/clang/test/Driver/fpatchable-function-entry.c
+++ b/clang/test/Driver/fpatchable-function-entry.c
@@ -8,6 +8,7 @@
 // RUN: %clang --target=riscv64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s
 // RUN: %clang --target=powerpc-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s
 // RUN: %clang --target=powerpc64-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s
+// RUN: %clang --target=powerpc64le-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s
 // CHECK: "-fpatchable-function-entry=1"
 
 // RUN: %clang --target=aarch64 -fsyntax-only %s -fpatchable-function-entry=1,1 -c -### 2>&1 | FileCheck --check-prefix=11 %s
diff --git a/clang/test/Sema/patchable-function-entry-attr.cpp b/clang/test/Sema/patchable-function-entry-attr.cpp
index 7498e67..97b9c26 100644
--- a/clang/test/Sema/patchable-function-entry-attr.cpp
+++ b/clang/test/Sema/patchable-function-entry-attr.cpp
@@ -8,7 +8,7 @@
 // RUN: %clang_cc1 -triple riscv64 -fsyntax-only -verify=silence %s
 // RUN: %clang_cc1 -triple powerpc-unknown-linux-gnu -fsyntax-only -verify=silence %s
 // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -fsyntax-only -verify=silence %s
-// RUN: %clang_cc1 -triple ppc64le -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple ppc64le -fsyntax-only -verify=silence %s
 // RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fsyntax-only -verify=AIX %s
 // RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fsyntax-only -verify=AIX %s
 
diff --git a/flang-rt/lib/cuda/descriptor.cpp b/flang-rt/lib/cuda/descriptor.cpp
index d3cc6c2..aa75d4e 100644
--- a/flang-rt/lib/cuda/descriptor.cpp
+++ b/flang-rt/lib/cuda/descriptor.cpp
@@ -62,15 +62,6 @@ void RTDEF(CUFDescriptorCheckSection)(
   }
 }
 
-void RTDEF(CUFSetAllocatorIndex)(
-    Descriptor *desc, int index, const char *sourceFile, int sourceLine) {
-  if (!desc) {
-    Terminator terminator{sourceFile, sourceLine};
-    terminator.Crash("descriptor is null");
-  }
-  desc->SetAllocIdx(index);
-}
-
 RT_EXT_API_GROUP_END
 }
 } // namespace Fortran::runtime::cuda
diff --git a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 4b22e29..f1f931e 100644
--- a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -72,14 +72,3 @@ TEST(AllocatableCUFTest, DescriptorAllocationTest) {
   EXPECT_TRUE(desc != nullptr);
   RTNAME(CUFFreeDescriptor)(desc);
 }
-
-TEST(AllocatableCUFTest, CUFSetAllocatorIndex) {
-  using Fortran::common::TypeCategory;
-  RTNAME(CUFRegisterAllocator)();
-  // REAL(4), DEVICE, ALLOCATABLE :: a(:)
-  auto a{createAllocatable(TypeCategory::Real, 4)};
-  EXPECT_EQ((int)kDefaultAllocator, a->GetAllocIdx());
-  RTNAME(CUFSetAllocatorIndex)(
-      a.get(), kDeviceAllocatorPos, __FILE__, __LINE__);
-  EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
-}
diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h
index 4a831fd..ab9dde8 100644
--- a/flang/include/flang/Lower/CUDA.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -47,10 +47,6 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
   return kDefaultAllocator;
 }
 
-void initializeDeviceComponentAllocator(
-    Fortran::lower::AbstractConverter &converter,
-    const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box);
-
 mlir::Type gatherDeviceComponentCoordinatesAndType(
     fir::FirOpBuilder &builder, mlir::Location loc,
     const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h
index 43dca65..bdeb757 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h
@@ -31,10 +31,6 @@ void genSyncGlobalDescriptor(fir::FirOpBuilder &builder, mlir::Location loc,
 void genDescriptorCheckSection(fir::FirOpBuilder &builder, mlir::Location loc,
                                mlir::Value desc);
 
-/// Generate runtime call to set the allocator index in the descriptor.
-void genSetAllocatorIndex(fir::FirOpBuilder &builder, mlir::Location loc,
-                          mlir::Value desc, mlir::Value index);
-
 } // namespace fir::runtime::cuda
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CUDA_DESCRIPTOR_H_
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 23ab8826..e3873823 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -388,25 +388,4 @@ def cuf_StreamCastOp : cuf_Op<"stream_cast", [NoMemoryEffect]> {
   let hasVerifier = 1;
 }
 
-def cuf_SetAllocatorIndexOp : cuf_Op<"set_allocator_idx", []> {
-  let summary = "Set the allocator index in a descriptor";
-
-  let description = [{
-    Allocator index in the Fortran descriptor is used to retrived the correct
-    CUDA allocator to allocate the memory on the device.
-    In many cases the allocator index is set when the descriptor is created. For
-    device components, the descriptor is part of the derived-type itself and
-    needs to be set after the derived-type is allocated in managed memory.
-  }];
-
-  let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
-      cuf_DataAttributeAttr:$data_attr);
-
-  let assemblyFormat = [{
-    $box `:` qualified(type($box)) attr-dict
-  }];
-
-  let hasVerifier = 1;
-}
-
 #endif // FORTRAN_DIALECT_CUF_CUF_OPS
diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h
index 7555f27..06e4a464 100644
--- a/flang/include/flang/Runtime/CUDA/descriptor.h
+++ b/flang/include/flang/Runtime/CUDA/descriptor.h
@@ -41,10 +41,6 @@ void RTDECL(CUFSyncGlobalDescriptor)(
 void RTDECL(CUFDescriptorCheckSection)(
     const Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0);
 
-/// Set the allocator index with the provided value.
-void RTDECL(CUFSetAllocatorIndex)(Descriptor *, int index,
-    const char *sourceFile = nullptr, int sourceLine = 0);
-
 } // extern "C"
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 444b5b6..53239cb 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -450,9 +450,6 @@ private:
     if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare))
       Fortran::lower::attachDeclarePostAllocAction(converter, builder,
                                                    alloc.getSymbol());
-    if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol()))
-      Fortran::lower::initializeDeviceComponentAllocator(
-          converter, alloc.getSymbol(), box);
   }
 
   void setPinnedToFalse() {
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index 1293d2c..bb4bdee 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -17,95 +17,6 @@
 
 #define DEBUG_TYPE "flang-lower-cuda"
 
-void Fortran::lower::initializeDeviceComponentAllocator(
-    Fortran::lower::AbstractConverter &converter,
-    const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) {
-  if (const auto *details{
-          sym.GetUltimate()
-              .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
-    const Fortran::semantics::DeclTypeSpec *type{details->type()};
-    const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
-                                                            : nullptr};
-    if (derived) {
-      if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
-        return; // No device components.
-
-      fir::FirOpBuilder &builder = converter.getFirOpBuilder();
-      mlir::Location loc = converter.getCurrentLocation();
-
-      mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType());
-
-      // Only pointer and allocatable needs post allocation initialization
-      // of components descriptors.
-      if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy))
-        return;
-
-      // Extract the derived type.
-      mlir::Type ty = fir::getDerivedType(baseTy);
-      auto recTy = mlir::dyn_cast<fir::RecordType>(ty);
-      assert(recTy && "expected fir::RecordType");
-
-      if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
-        baseTy = boxTy.getEleTy();
-      baseTy = fir::unwrapRefType(baseTy);
-
-      Fortran::semantics::UltimateComponentIterator components{*derived};
-      mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr());
-      mlir::Value addr;
-      if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) {
-        mlir::Type idxTy = builder.getIndexType();
-        mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
-        mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
-        llvm::SmallVector<fir::DoLoopOp> loops;
-        llvm::SmallVector<mlir::Value> indices;
-        llvm::SmallVector<mlir::Value> extents;
-        for (unsigned i = 0; i < seqTy.getDimension(); ++i) {
-          mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
-          auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy,
-                                                idxTy, loadedBox, dim);
-          mlir::Value lbub = mlir::arith::AddIOp::create(
-              builder, loc, dimInfo.getResult(0), dimInfo.getResult(1));
-          mlir::Value ext =
-              mlir::arith::SubIOp::create(builder, loc, lbub, one);
-          mlir::Value cmp = mlir::arith::CmpIOp::create(
-              builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero);
-          ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero);
-          extents.push_back(ext);
-
-          auto loop = fir::DoLoopOp::create(
-              builder, loc, dimInfo.getResult(0), dimInfo.getResult(1),
-              dimInfo.getResult(2), /*isUnordered=*/true,
-              /*finalCount=*/false, mlir::ValueRange{});
-          loops.push_back(loop);
-          indices.push_back(loop.getInductionVar());
-          builder.setInsertionPointToStart(loop.getBody());
-        }
-        mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox);
-        auto shape = fir::ShapeOp::create(builder, loc, extents);
-        addr = fir::ArrayCoorOp::create(
-            builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape,
-            /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{});
-      } else {
-        addr = fir::BoxAddrOp::create(builder, loc, loadedBox);
-      }
-      for (const auto &compSym : components) {
-        if (Fortran::semantics::IsDeviceAllocatable(compSym)) {
-          llvm::SmallVector<mlir::Value> coord;
-          mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType(
-              builder, loc, compSym, recTy, coord);
-          assert(coord.size() == 1 && "expect one coordinate");
-          mlir::Value comp = fir::CoordinateOp::create(
-              builder, loc, builder.getRefType(fieldTy), addr, coord[0]);
-          cuf::DataAttributeAttr dataAttr =
-              Fortran::lower::translateSymbolCUFDataAttribute(
-                  builder.getContext(), compSym);
-          cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
-        }
-      }
-    }
-  }
-}
-
 mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType(
     fir::FirOpBuilder &builder, mlir::Location loc,
     const Fortran::semantics::Symbol &sym, fir::RecordType recTy,
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index c79c9b1..b1cd14c 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -786,62 +786,6 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
   return res;
 }
 
-/// Device allocatable components in a derived-type don't have the correct
-/// allocator index in their descriptor when they are created. After
-/// initialization, cuf.set_allocator_idx operations are inserted to set the
-/// correct allocator index for each device component.
-static void
-initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
-                                   const Fortran::semantics::Symbol &symbol,
-                                   Fortran::lower::SymMap &symMap) {
-  if (const auto *details{
-          symbol.GetUltimate()
-              .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
-    const Fortran::semantics::DeclTypeSpec *type{details->type()};
-    const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived()
-                                                            : nullptr};
-    if (derived) {
-      if (!FindCUDADeviceAllocatableUltimateComponent(*derived))
-        return; // No device components.
-
-      fir::FirOpBuilder &builder = converter.getFirOpBuilder();
-      mlir::Location loc = converter.getCurrentLocation();
-
-      fir::ExtendedValue exv =
-          converter.getSymbolExtendedValue(symbol.GetUltimate(), &symMap);
-      mlir::Type baseTy = fir::unwrapRefType(fir::getBase(exv).getType());
-      if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy))
-        baseTy = boxTy.getEleTy();
-      baseTy = fir::unwrapRefType(baseTy);
-
-      if (fir::isAllocatableType(fir::getBase(exv).getType()) ||
-          fir::isPointerType(fir::getBase(exv).getType()))
-        return; // Allocator index need to be set after allocation.
-
-      auto recTy =
-          mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
-      assert(recTy && "expected fir::RecordType");
-
-      Fortran::semantics::UltimateComponentIterator components{*derived};
-      for (const auto &sym : components) {
-        if (Fortran::semantics::IsDeviceAllocatable(sym)) {
-          llvm::SmallVector<mlir::Value> coord;
-          mlir::Type fieldTy =
-              Fortran::lower::gatherDeviceComponentCoordinatesAndType(
-                  builder, loc, sym, recTy, coord);
-          mlir::Value base = fir::getBase(exv);
-          mlir::Value comp = fir::CoordinateOp::create(
-              builder, loc, builder.getRefType(fieldTy), base, coord);
-          cuf::DataAttributeAttr dataAttr =
-              Fortran::lower::translateSymbolCUFDataAttribute(
-                  builder.getContext(), sym);
-          cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr);
-        }
-      }
-    }
-  }
-}
-
 /// Must \p var be default initialized at runtime when entering its scope.
 static bool
 mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) {
@@ -1164,9 +1108,6 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
   if (mustBeDefaultInitializedAtRuntime(var))
     Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
                                                symMap);
-  if (converter.getFoldingContext().languageFeatures().IsEnabled(
-          Fortran::common::LanguageFeature::CUDA))
-    initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap);
   auto *builder = &converter.getFirOpBuilder();
   if (needCUDAAlloc(var.getSymbol()) &&
       !cuf::isCUDADeviceContext(builder->getRegion())) {
@@ -1426,9 +1367,6 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter,
   if (mustBeDefaultInitializedAtRuntime(var))
     Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
                                                symMap);
-  if (converter.getFoldingContext().languageFeatures().IsEnabled(
-          Fortran::common::LanguageFeature::CUDA))
-    initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap);
 }
 
 //===--------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp
index a6ee986..37e4c5a 100644
--- a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp
@@ -47,18 +47,3 @@ void fir::runtime::cuda::genDescriptorCheckSection(fir::FirOpBuilder &builder,
       builder, loc, fTy, desc, sourceFile, sourceLine)};
   fir::CallOp::create(builder, loc, func, args);
 }
-
-void fir::runtime::cuda::genSetAllocatorIndex(fir::FirOpBuilder &builder,
-                                              mlir::Location loc,
-                                              mlir::Value desc,
-                                              mlir::Value index) {
-  mlir::func::FuncOp func =
-      fir::runtime::getRuntimeFunc<mkRTKey(CUFSetAllocatorIndex)>(loc, builder);
-  auto fTy = func.getFunctionType();
-  mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
-  mlir::Value sourceLine =
-      fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
-  llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
-      builder, loc, fTy, desc, index, sourceFile, sourceLine)};
-  fir::CallOp::create(builder, loc, func, args);
-}
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index ade8071..687007d 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -345,17 +345,6 @@ llvm::LogicalResult cuf::StreamCastOp::verify() {
   return checkStreamType(*this);
 }
 
-//===----------------------------------------------------------------------===//
-// SetAllocatorOp
-//===----------------------------------------------------------------------===//
-
-llvm::LogicalResult cuf::SetAllocatorIndexOp::verify() {
-  if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
-    return emitOpError(
-        "expect box to be a reference to class or box type value");
-  return mlir::success();
-}
-
 // Tablegen operators
 
 #define GET_OP_CLASSES
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 9834b04..4fe83d4 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -928,34 +928,6 @@ struct CUFSyncDescriptorOpConversion
   }
 };
 
-struct CUFSetAllocatorIndexOpConversion
-    : public mlir::OpRewritePattern<cuf::SetAllocatorIndexOp> {
-  using OpRewritePattern::OpRewritePattern;
-
-  mlir::LogicalResult
-  matchAndRewrite(cuf::SetAllocatorIndexOp op,
-                  mlir::PatternRewriter &rewriter) const override {
-    auto mod = op->getParentOfType<mlir::ModuleOp>();
-    fir::FirOpBuilder builder(rewriter, mod);
-    mlir::Location loc = op.getLoc();
-    int idx = kDefaultAllocator;
-    if (op.getDataAttr() == cuf::DataAttribute::Device) {
-      idx = kDeviceAllocatorPos;
-    } else if (op.getDataAttr() == cuf::DataAttribute::Managed) {
-      idx = kManagedAllocatorPos;
-    } else if (op.getDataAttr() == cuf::DataAttribute::Unified) {
-      idx = kUnifiedAllocatorPos;
-    } else if (op.getDataAttr() == cuf::DataAttribute::Pinned) {
-      idx = kPinnedAllocatorPos;
-    }
-    mlir::Value index =
-        builder.createIntegerConstant(loc, builder.getI32Type(), idx);
-    fir::runtime::cuda::genSetAllocatorIndex(builder, loc, op.getBox(), index);
-    op.erase();
-    return mlir::success();
-  }
-};
-
 class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
 public:
   void runOnOperation() override {
@@ -1017,8 +989,8 @@ void cuf::populateCUFToFIRConversionPatterns(
     const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) {
   patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter);
   patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion,
-                  CUFFreeOpConversion, CUFSyncDescriptorOpConversion,
-                  CUFSetAllocatorIndexOpConversion>(patterns.getContext());
+                  CUFFreeOpConversion, CUFSyncDescriptorOpConversion>(
+      patterns.getContext());
   patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab,
                                                &dl, &converter);
   patterns.insert<CUFLaunchOpConversion, CUFDeviceAddressOpConversion>(
diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir
index 8b6e7d6..31f2ed0 100644
--- a/flang/test/Fir/CUDA/cuda-alloc-free.fir
+++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir
@@ -94,19 +94,4 @@ func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} {
 // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64
 // CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
 
-
-func.func @_QQsetalloc() {
-  %0 = cuf.alloc !fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QFEd1"} -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>
-  %1 = fir.coordinate_of %0, a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
-  cuf.set_allocator_idx %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>}
-  return
-}
-
-// CHECK-LABEL:   func.func @_QQsetalloc() {
-// CHECK: %[[DT:.*]] = fir.call @_FortranACUFMemAlloc
-// CHECK: %[[CONV:.*]] = fir.convert %[[DT]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>
-// CHECK: %[[COMP:.*]] = fir.coordinate_of %[[CONV]], a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
-// CHECK: %[[DESC:.*]] = fir.convert %[[COMP]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
-// CHECK: fir.call @_FortranACUFSetAllocatorIndex(%[[DESC]], %c2{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()
-
 } // end module
diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf
deleted file mode 100644
index d783f34..0000000
--- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf
+++ /dev/null
@@ -1,66 +0,0 @@
-! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
-
-module m1
-  type ty_device
-    integer, device, allocatable, dimension(:) :: x
-    integer :: y
-    integer, device, allocatable, dimension(:) :: z
-  end type
-contains
-  subroutine sub1()
-    type(ty_device) :: a
-  end subroutine
-
-! CHECK-LABEL: func.func @_QMm1Psub1()
-! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub1Ea"} -> !fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[DT:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub1Ea"} : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>, !fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>)
-! CHECK: fir.address_of(@_QQ_QMm1Tty_device.DerivedInit)
-! CHECK: fir.copy 
-! CHECK: %[[X:.*]] = fir.coordinate_of %[[DT]]#0, x : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[X]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[Z:.*]] = fir.coordinate_of %[[DT]]#0, z : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[Z]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-
-  subroutine sub2()
-    type(ty_device), pointer :: d1
-    allocate(d1)
-  end subroutine
-
-! CHECK-LABEL: func.func @_QMm1Psub2()
-! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: cuf.allocate
-! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-
-  subroutine sub3()
-    type(ty_device), allocatable :: d1
-    allocate(d1)
-  end subroutine
-
-! CHECK-LABEL: func.func @_QMm1Psub3()
-! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>)
-! CHECK: cuf.allocate
-! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>
-! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>}
-
-  subroutine sub4()
-    type(ty_device), allocatable :: d1(:,:)
-    allocate(d1(10, 10))
-  end subroutine
-
-! CHECK-LABEL: func.func @_QMm1Psub4()
-! CHECK: cuf.allocate
-! CHECK-COUNT-2: fir.do_loop
-! CHECK-COUNT-2: cuf.set_allocator_idx
-
-end module
diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h
index 449455c..0cc74fb 100644
--- a/libc/src/__support/CPP/simd.h
+++ b/libc/src/__support/CPP/simd.h
@@ -37,15 +37,15 @@ using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT);
 
 #if defined(LIBC_TARGET_CPU_HAS_AVX512F)
 template <typename T>
-inline constexpr size_t native_vector_size = 64 / sizeof(T);
+LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T);
 #elif defined(LIBC_TARGET_CPU_HAS_AVX2)
 template <typename T>
-inline constexpr size_t native_vector_size = 32 / sizeof(T);
+LIBC_INLINE_VAR constexpr size_t native_vector_size = 32 / sizeof(T);
 #elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON)
 template <typename T>
-inline constexpr size_t native_vector_size = 16 / sizeof(T);
+LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T);
 #else
-template <typename T> inline constexpr size_t native_vector_size = 1;
+template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1;
 #endif
 
 template <typename T> LIBC_INLINE constexpr T poison() {
@@ -90,122 +90,127 @@ using enable_if_simd_t = cpp::enable_if_t<is_simd_v<T>, T>;
 
 // Casting.
 template <typename To, typename From, size_t N>
-LIBC_INLINE constexpr simd<To, N> simd_cast(simd<From, N> v) {
+LIBC_INLINE constexpr static simd<To, N> simd_cast(simd<From, N> v) {
   return __builtin_convertvector(v, simd<To, N>);
 }
 
 // SIMD mask operations.
-template <size_t N> LIBC_INLINE constexpr bool all_of(simd<bool, N> m) {
+template <size_t N> LIBC_INLINE constexpr static bool all_of(simd<bool, N> m) {
   return __builtin_reduce_and(m);
 }
-template <size_t N> LIBC_INLINE constexpr bool any_of(simd<bool, N> m) {
+template <size_t N> LIBC_INLINE constexpr static bool any_of(simd<bool, N> m) {
   return __builtin_reduce_or(m);
 }
-template <size_t N> LIBC_INLINE constexpr bool none_of(simd<bool, N> m) {
+template <size_t N> LIBC_INLINE constexpr static bool none_of(simd<bool, N> m) {
   return !any_of(m);
 }
-template <size_t N> LIBC_INLINE constexpr bool some_of(simd<bool, N> m) {
+template <size_t N> LIBC_INLINE constexpr static bool some_of(simd<bool, N> m) {
   return any_of(m) && !all_of(m);
 }
-template <size_t N> LIBC_INLINE constexpr int popcount(simd<bool, N> m) {
+template <size_t N> LIBC_INLINE constexpr static int popcount(simd<bool, N> m) {
   return __builtin_popcountg(m);
 }
-template <size_t N> LIBC_INLINE constexpr int find_first_set(simd<bool, N> m) {
+template <size_t N>
+LIBC_INLINE constexpr static int find_first_set(simd<bool, N> m) {
   return __builtin_ctzg(m);
 }
-template <size_t N> LIBC_INLINE constexpr int find_last_set(simd<bool, N> m) {
+template <size_t N>
+LIBC_INLINE constexpr static int find_last_set(simd<bool, N> m) {
   constexpr size_t size = simd_size_v<simd<bool, N>>;
   return size - __builtin_clzg(m);
 }
 
 // Elementwise operations.
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> min(simd<T, N> x, simd<T, N> y) {
+LIBC_INLINE constexpr static simd<T, N> min(simd<T, N> x, simd<T, N> y) {
   return __builtin_elementwise_min(x, y);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> max(simd<T, N> x, simd<T, N> y) {
+LIBC_INLINE constexpr static simd<T, N> max(simd<T, N> x, simd<T, N> y) {
   return __builtin_elementwise_max(x, y);
 }
 
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> abs(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> abs(simd<T, N> x) {
   return __builtin_elementwise_abs(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> fma(simd<T, N> x, simd<T, N> y, simd<T, N> z) {
+LIBC_INLINE constexpr static simd<T, N> fma(simd<T, N> x, simd<T, N> y,
+                                            simd<T, N> z) {
   return __builtin_elementwise_fma(x, y, z);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> ceil(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> ceil(simd<T, N> x) {
   return __builtin_elementwise_ceil(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> floor(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> floor(simd<T, N> x) {
   return __builtin_elementwise_floor(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> roundeven(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> roundeven(simd<T, N> x) {
   return __builtin_elementwise_roundeven(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> round(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> round(simd<T, N> x) {
   return __builtin_elementwise_round(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> trunc(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> trunc(simd<T, N> x) {
   return __builtin_elementwise_trunc(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> nearbyint(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> nearbyint(simd<T, N> x) {
   return __builtin_elementwise_nearbyint(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> rint(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> rint(simd<T, N> x) {
   return __builtin_elementwise_rint(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> canonicalize(simd<T, N> x) {
+LIBC_INLINE constexpr static simd<T, N> canonicalize(simd<T, N> x) {
   return __builtin_elementwise_canonicalize(x);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> copysign(simd<T, N> x, simd<T, N> y) {
+LIBC_INLINE constexpr static simd<T, N> copysign(simd<T, N> x, simd<T, N> y) {
   return __builtin_elementwise_copysign(x, y);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> fmod(simd<T, N> x, simd<T, N> y) {
+LIBC_INLINE constexpr static simd<T, N> fmod(simd<T, N> x, simd<T, N> y) {
   return __builtin_elementwise_fmod(x, y);
 }
 
 // Reduction operations.
 template <typename T, size_t N, typename Op = cpp::plus<>>
-LIBC_INLINE constexpr T reduce(simd<T, N> v, Op op = {}) {
+LIBC_INLINE constexpr static T reduce(simd<T, N> v, Op op = {}) {
   return reduce(v, op);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::plus<>) {
+LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::plus<>) {
   return __builtin_reduce_add(v);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::multiplies<>) {
+LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::multiplies<>) {
   return __builtin_reduce_mul(v);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_and<>) {
+LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_and<>) {
   return __builtin_reduce_and(v);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_or<>) {
+LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_or<>) {
   return __builtin_reduce_or(v);
 }
 template <typename T, size_t N>
-LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_xor<>) {
+LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_xor<>) {
   return __builtin_reduce_xor(v);
 }
-template <typename T, size_t N> LIBC_INLINE constexpr T hmin(simd<T, N> v) {
+template <typename T, size_t N>
+LIBC_INLINE constexpr static T hmin(simd<T, N> v) {
   return __builtin_reduce_min(v);
 }
-template <typename T, size_t N> LIBC_INLINE constexpr T hmax(simd<T, N> v) {
+template <typename T, size_t N>
+LIBC_INLINE constexpr static T hmax(simd<T, N> v) {
   return __builtin_reduce_max(v);
 }
 
@@ -242,28 +247,29 @@ LIBC_INLINE enable_if_simd_t<T> masked_store(simd<bool, simd_size_v<T>> m, T v,
 }
 
 // Construction helpers.
-template <typename T, size_t N> LIBC_INLINE constexpr simd<T, N> splat(T v) {
+template <typename T, size_t N>
+LIBC_INLINE constexpr static simd<T, N> splat(T v) {
   return simd<T, N>(v);
 }
-template <typename T> LIBC_INLINE constexpr simd<T> splat(T v) {
+template <typename T> LIBC_INLINE constexpr static simd<T> splat(T v) {
   return splat<T, simd_size_v<simd<T>>>(v);
 }
 template <typename T, unsigned N>
-LIBC_INLINE constexpr simd<T, N> iota(T base = T(0), T step = T(1)) {
+LIBC_INLINE constexpr static simd<T, N> iota(T base = T(0), T step = T(1)) {
   simd<T, N> v{};
   for (unsigned i = 0; i < N; ++i)
     v[i] = base + T(i) * step;
   return v;
 }
 template <typename T>
-LIBC_INLINE constexpr simd<T> iota(T base = T(0), T step = T(1)) {
+LIBC_INLINE constexpr static simd<T> iota(T base = T(0), T step = T(1)) {
   return iota<T, simd_size_v<simd<T>>>(base, step);
 }
 
 // Conditional helpers.
 template <typename T, size_t N>
-LIBC_INLINE constexpr simd<T, N> select(simd<bool, N> m, simd<T, N> x,
-                                        simd<T, N> y) {
+LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x,
+                                               simd<T, N> y) {
   return m ? x : y;
 }
 
diff --git a/lldb/include/lldb/Core/Architecture.h b/lldb/include/lldb/Core/Architecture.h
index b6fc1a2..ed64a89 100644
--- a/lldb/include/lldb/Core/Architecture.h
+++ b/lldb/include/lldb/Core/Architecture.h
@@ -12,6 +12,7 @@
 #include "lldb/Core/PluginInterface.h"
 #include "lldb/Target/DynamicRegisterInfo.h"
 #include "lldb/Target/MemoryTagManager.h"
+#include "lldb/Target/RegisterContextUnwind.h"
 
 namespace lldb_private {
 
@@ -129,6 +130,14 @@ public:
                                        RegisterContext &reg_context) const {
     return false;
   }
+
+  /// Return an UnwindPlan that allows architecture-defined rules for finding
+  /// saved registers, given a particular set of register values.
+  virtual lldb::UnwindPlanSP GetArchitectureUnwindPlan(
+      lldb_private::Thread &thread, lldb_private::RegisterContextUnwind *regctx,
+      std::shared_ptr<const UnwindPlan> current_unwindplan) {
+    return lldb::UnwindPlanSP();
+  }
 };
 
 } // namespace lldb_private
diff --git a/lldb/include/lldb/Target/RegisterContextUnwind.h b/lldb/include/lldb/Target/RegisterContextUnwind.h
index b10a3648..52c28fd 100644
--- a/lldb/include/lldb/Target/RegisterContextUnwind.h
+++ b/lldb/include/lldb/Target/RegisterContextUnwind.h
@@ -21,6 +21,7 @@
 namespace lldb_private {
 
 class UnwindLLDB;
+class ArchitectureArm;
 
 class RegisterContextUnwind : public lldb_private::RegisterContext {
 public:
@@ -72,6 +73,25 @@ public:
   // above asynchronous trap handlers (sigtramp) for instance.
   bool BehavesLikeZerothFrame() const override;
 
+protected:
+  // Provide a location for where THIS function saved the CALLER's register
+  // value, or a frame "below" this one saved it. That is, this function doesn't
+  // modify the register, it may call a function that does & saved it to stack.
+  //
+  // The ConcreteRegisterLocation type may be set to eRegisterNotAvailable --
+  // this will happen for a volatile register being queried mid-stack.  Instead
+  // of floating frame 0's contents of that register up the stack (which may or
+  // may not be the value of that reg when the function was executing), we won't
+  // return any value.
+  //
+  // If a non-volatile register (a "preserved" register, a callee-preserved
+  // register) is requested mid-stack, and no frames "below" the requested stack
+  // have saved the register anywhere, it is safe to assume that frame 0's
+  // register value is the same.
+  lldb_private::UnwindLLDB::RegisterSearchResult SavedLocationForRegister(
+      uint32_t lldb_regnum,
+      lldb_private::UnwindLLDB::ConcreteRegisterLocation &regloc);
+
 private:
   enum FrameType {
     eNormalFrame,
@@ -86,6 +106,8 @@ private:
 
   // UnwindLLDB needs to pass around references to ConcreteRegisterLocations
   friend class UnwindLLDB;
+  // Architecture may need to retrieve caller register values from this frame
+  friend class ArchitectureArm;
 
   // Returns true if we have an unwind loop -- the same stack frame unwinding
   // multiple times.
@@ -130,27 +152,6 @@ private:
   void PropagateTrapHandlerFlagFromUnwindPlan(
       std::shared_ptr<const UnwindPlan> unwind_plan);
 
-  // Provide a location for where THIS function saved the CALLER's register
-  // value
-  // Or a frame "below" this one saved it, i.e. a function called by this one,
-  // preserved a register that this
-  // function didn't modify/use.
-  //
-  // The ConcreteRegisterLocation type may be set to eRegisterNotAvailable --
-  // this will happen for a volatile register being queried mid-stack.  Instead
-  // of floating frame 0's contents of that register up the stack (which may or
-  // may not be the value of that reg when the function was executing), we won't
-  // return any value.
-  //
-  // If a non-volatile register (a "preserved" register) is requested mid-stack
-  // and no frames "below" the requested
-  // stack have saved the register anywhere, it is safe to assume that frame 0's
-  // register values are still the same
-  // as the requesting frame's.
-  lldb_private::UnwindLLDB::RegisterSearchResult SavedLocationForRegister(
-      uint32_t lldb_regnum,
-      lldb_private::UnwindLLDB::ConcreteRegisterLocation &regloc);
-
   std::optional<UnwindPlan::Row::AbstractRegisterLocation>
   GetAbstractRegisterLocation(uint32_t lldb_regnum, lldb::RegisterKind &kind);
 
@@ -202,6 +203,8 @@ private:
 
   std::shared_ptr<const UnwindPlan> GetFullUnwindPlanForFrame();
 
+  lldb::UnwindPlanSP TryAdoptArchitectureUnwindPlan();
+
   void UnwindLogMsg(const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 
   void UnwindLogMsgVerbose(const char *fmt, ...)
diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h
index 368ec51d..cdd6a6f 100644
--- a/lldb/include/lldb/Target/StopInfo.h
+++ b/lldb/include/lldb/Target/StopInfo.h
@@ -97,6 +97,12 @@ public:
   /// and silently continue again one more time.
   virtual bool WasContinueInterrupted(Thread &thread) { return false; }
 
+  virtual uint32_t GetStopReasonDataCount() const { return 0; }
+  virtual uint64_t GetStopReasonDataAtIndex(uint32_t idx) {
+    // Handle all the common cases that have no data.
+    return 0;
+  }
+
   // Sometimes the thread plan logic will know that it wants a given stop to
   // stop or not, regardless of what the ordinary logic for that StopInfo would
   // dictate.  The main example of this is the ThreadPlanCallFunction, which
diff --git a/lldb/include/lldb/Target/UnwindLLDB.h b/lldb/include/lldb/Target/UnwindLLDB.h
index 88180b3..29b3ab9c 100644
--- a/lldb/include/lldb/Target/UnwindLLDB.h
+++ b/lldb/include/lldb/Target/UnwindLLDB.h
@@ -22,6 +22,7 @@
 namespace lldb_private {
 
 class RegisterContextUnwind;
+class ArchitectureArm;
 
 class UnwindLLDB : public lldb_private::Unwind {
 public:
@@ -37,6 +38,7 @@ public:
 
 protected:
   friend class lldb_private::RegisterContextUnwind;
+  friend class lldb_private::ArchitectureArm;
 
   /// An UnwindPlan::Row::AbstractRegisterLocation, combined with the register
   /// context and memory for a specific stop point, is used to create a
diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp
index ec68b2a..4e4aa48 100644
--- a/lldb/source/API/SBThread.cpp
+++ b/lldb/source/API/SBThread.cpp
@@ -157,52 +157,8 @@ size_t SBThread::GetStopReasonDataCount() {
   if (exe_ctx) {
     if (exe_ctx->HasThreadScope()) {
       StopInfoSP stop_info_sp = exe_ctx->GetThreadPtr()->GetStopInfo();
-      if (stop_info_sp) {
-        StopReason reason = stop_info_sp->GetStopReason();
-        switch (reason) {
-        case eStopReasonInvalid:
-        case eStopReasonNone:
-        case eStopReasonTrace:
-        case eStopReasonExec:
-        case eStopReasonPlanComplete:
-        case eStopReasonThreadExiting:
-        case eStopReasonInstrumentation:
-        case eStopReasonProcessorTrace:
-        case eStopReasonVForkDone:
-        case eStopReasonHistoryBoundary:
-          // There is no data for these stop reasons.
-          return 0;
-
-        case eStopReasonBreakpoint: {
-          break_id_t site_id = stop_info_sp->GetValue();
-          lldb::BreakpointSiteSP bp_site_sp(
-              exe_ctx->GetProcessPtr()->GetBreakpointSiteList().FindByID(
-                  site_id));
-          if (bp_site_sp)
-            return bp_site_sp->GetNumberOfConstituents() * 2;
-          else
-            return 0; // Breakpoint must have cleared itself...
-        } break;
-
-        case eStopReasonWatchpoint:
-          return 1;
-
-        case eStopReasonSignal:
-          return 1;
-
-        case eStopReasonInterrupt:
-          return 1;
-
-        case eStopReasonException:
-          return 1;
-
-        case eStopReasonFork:
-          return 1;
-
-        case eStopReasonVFork:
-          return 1;
-        }
-      }
+      if (stop_info_sp)
+        return stop_info_sp->GetStopReasonDataCount();
     }
   } else {
     LLDB_LOG_ERROR(GetLog(LLDBLog::API), exe_ctx.takeError(), "{0}");
@@ -220,63 +176,8 @@ uint64_t SBThread::GetStopReasonDataAtIndex(uint32_t idx) {
     if (exe_ctx->HasThreadScope()) {
       Thread *thread = exe_ctx->GetThreadPtr();
       StopInfoSP stop_info_sp = thread->GetStopInfo();
-      if (stop_info_sp) {
-        StopReason reason = stop_info_sp->GetStopReason();
-        switch (reason) {
-        case eStopReasonInvalid:
-        case eStopReasonNone:
-        case eStopReasonTrace:
-        case eStopReasonExec:
-        case eStopReasonPlanComplete:
-        case eStopReasonThreadExiting:
-        case eStopReasonInstrumentation:
-        case eStopReasonProcessorTrace:
-        case eStopReasonVForkDone:
-        case eStopReasonHistoryBoundary:
-          // There is no data for these stop reasons.
-          return 0;
-
-        case eStopReasonBreakpoint: {
-          break_id_t site_id = stop_info_sp->GetValue();
-          lldb::BreakpointSiteSP bp_site_sp(
-              exe_ctx->GetProcessPtr()->GetBreakpointSiteList().FindByID(
-                  site_id));
-          if (bp_site_sp) {
-            uint32_t bp_index = idx / 2;
-            BreakpointLocationSP bp_loc_sp(
-                bp_site_sp->GetConstituentAtIndex(bp_index));
-            if (bp_loc_sp) {
-              if (idx & 1) {
-                // Odd idx, return the breakpoint location ID
-                return bp_loc_sp->GetID();
-              } else {
-                // Even idx, return the breakpoint ID
-                return bp_loc_sp->GetBreakpoint().GetID();
-              }
-            }
-          }
-          return LLDB_INVALID_BREAK_ID;
-        } break;
-
-        case eStopReasonWatchpoint:
-          return stop_info_sp->GetValue();
-
-        case eStopReasonSignal:
-          return stop_info_sp->GetValue();
-
-        case eStopReasonInterrupt:
-          return stop_info_sp->GetValue();
-
-        case eStopReasonException:
-          return stop_info_sp->GetValue();
-
-        case eStopReasonFork:
-          return stop_info_sp->GetValue();
-
-        case eStopReasonVFork:
-          return stop_info_sp->GetValue();
-        }
-      }
+      if (stop_info_sp)
+        return stop_info_sp->GetStopReasonDataAtIndex(idx);
     }
   } else {
     LLDB_LOG_ERROR(GetLog(LLDBLog::API), exe_ctx.takeError(), "{0}");
diff --git a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
index 2bcb2c0..bb0c4ba 100644
--- a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
@@ -1921,6 +1921,13 @@ UnwindPlanSP ABISysV_arm::CreateFunctionEntryUnwindPlan() {
 
 UnwindPlanSP ABISysV_arm::CreateDefaultUnwindPlan() {
   // TODO: Handle thumb
+  // If we had a Target argument, could at least check
+  // target.GetArchitecture().GetTriple().isArmMClass()
+  // which is always thumb.
+  // To handle thumb properly, we'd need to fetch the current
+  // CPSR state at unwind time to tell if the processor is
+  // in thumb mode in this stack frame.  There's no way to
+  // express something like that in an UnwindPlan today.
   uint32_t fp_reg_num = dwarf_r11;
   uint32_t pc_reg_num = dwarf_pc;
 
diff --git a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp
index 81c7212..721c4bc 100644
--- a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp
+++ b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp
@@ -9,10 +9,18 @@
 #include "Plugins/Architecture/Arm/ArchitectureArm.h"
 #include "Plugins/Process/Utility/ARMDefines.h"
 #include "Plugins/Process/Utility/InstructionUtils.h"
+#include "Utility/ARM_DWARF_Registers.h"
 #include "lldb/Core/PluginManager.h"
+#include "lldb/Symbol/UnwindPlan.h"
+#include "lldb/Target/Process.h"
 #include "lldb/Target/RegisterContext.h"
+#include "lldb/Target/RegisterNumber.h"
 #include "lldb/Target/Thread.h"
+#include "lldb/Target/UnwindLLDB.h"
 #include "lldb/Utility/ArchSpec.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/RegisterValue.h"
 
 using namespace lldb_private;
 using namespace lldb;
@@ -150,3 +158,181 @@ addr_t ArchitectureArm::GetOpcodeLoadAddress(addr_t opcode_addr,
   }
   return opcode_addr & ~(1ull);
 }
+
+// The ARM M-Profile Armv7-M Architecture Reference Manual,
+// subsection "B1.5 Armv7-M exception model", see the parts
+// describing "Exception entry behavior" and "Exception
+// return behavior".
+// When an exception happens on this processor, certain registers are
+// saved below the stack pointer, the stack pointer is decremented,
+// a special value is put in the link register to indicate the
+// exception has been taken, and an exception handler function
+// is invoked.
+//
+// Detect that special value in $lr, and if present, add
+// unwind rules for the registers that were saved above this
+// stack frame's CFA.  Overwrite any register locations that
+// the current_unwindplan has for these registers; they are
+// not correct when we're invoked this way.
+UnwindPlanSP ArchitectureArm::GetArchitectureUnwindPlan(
+    Thread &thread, RegisterContextUnwind *regctx,
+    std::shared_ptr<const UnwindPlan> current_unwindplan) {
+
+  ProcessSP process_sp = thread.GetProcess();
+  if (!process_sp)
+    return {};
+
+  const ArchSpec arch = process_sp->GetTarget().GetArchitecture();
+  if (!arch.GetTriple().isArmMClass() || arch.GetAddressByteSize() != 4)
+    return {};
+
+  // Get the caller's LR value from regctx (the LR value
+  // at function entry to this function).
+  RegisterNumber ra_regnum(thread, eRegisterKindGeneric,
+                           LLDB_REGNUM_GENERIC_RA);
+  uint32_t ra_regnum_lldb = ra_regnum.GetAsKind(eRegisterKindLLDB);
+
+  if (ra_regnum_lldb == LLDB_INVALID_REGNUM)
+    return {};
+
+  UnwindLLDB::ConcreteRegisterLocation regloc = {};
+  bool got_concrete_location = false;
+  if (regctx->SavedLocationForRegister(ra_regnum_lldb, regloc) ==
+      UnwindLLDB::RegisterSearchResult::eRegisterFound) {
+    got_concrete_location = true;
+  } else {
+    RegisterNumber pc_regnum(thread, eRegisterKindGeneric,
+                             LLDB_REGNUM_GENERIC_PC);
+    uint32_t pc_regnum_lldb = pc_regnum.GetAsKind(eRegisterKindLLDB);
+    if (regctx->SavedLocationForRegister(pc_regnum_lldb, regloc) ==
+        UnwindLLDB::RegisterSearchResult::eRegisterFound)
+      got_concrete_location = true;
+  }
+
+  if (!got_concrete_location)
+    return {};
+
+  addr_t callers_return_address = LLDB_INVALID_ADDRESS;
+  const RegisterInfo *reg_info = regctx->GetRegisterInfoAtIndex(ra_regnum_lldb);
+  if (reg_info) {
+    RegisterValue reg_value;
+    if (regctx->ReadRegisterValueFromRegisterLocation(regloc, reg_info,
+                                                      reg_value)) {
+      callers_return_address = reg_value.GetAsUInt32();
+    }
+  }
+
+  if (callers_return_address == LLDB_INVALID_ADDRESS)
+    return {};
+
+  // ARMv7-M ARM says that the LR will be set to
+  // one of these values when an exception has taken
+  // place:
+  //    if HaveFPExt() then
+  //      if CurrentMode==Mode_Handler then
+  //        LR = Ones(27):NOT(CONTROL.FPCA):'0001';
+  //      else
+  //        LR = Ones(27):NOT(CONTROL.FPCA):'1':CONTROL.SPSEL:'01';
+  //    else
+  //      if CurrentMode==Mode_Handler then
+  //        LR = Ones(28):'0001';
+  //      else
+  //        LR = Ones(29):CONTROL.SPSEL:'01';
+
+  // Top 27 bits are set for an exception return.
+  const uint32_t exception_return = -1U & ~0b11111U;
+  // Bit4 is 1 if only GPRs were saved.
+  const uint32_t gprs_only = 0b10000;
+  // Bit<1:0> are '01'.
+  const uint32_t lowbits = 0b01;
+
+  if ((callers_return_address & exception_return) != exception_return)
+    return {};
+  if ((callers_return_address & lowbits) != lowbits)
+    return {};
+
+  const bool fp_regs_saved = !(callers_return_address & gprs_only);
+
+  const RegisterKind plan_regkind = current_unwindplan->GetRegisterKind();
+  UnwindPlanSP new_plan = std::make_shared<UnwindPlan>(plan_regkind);
+  new_plan->SetSourceName("Arm Cortex-M exception return UnwindPlan");
+  new_plan->SetSourcedFromCompiler(eLazyBoolNo);
+  new_plan->SetUnwindPlanValidAtAllInstructions(eLazyBoolYes);
+  new_plan->SetUnwindPlanForSignalTrap(eLazyBoolYes);
+
+  int stored_regs_size = fp_regs_saved ? 0x68 : 0x20;
+
+  uint32_t gpr_regs[] = {dwarf_r0,  dwarf_r1, dwarf_r2, dwarf_r3,
+                         dwarf_r12, dwarf_lr, dwarf_pc, dwarf_cpsr};
+  const int gpr_reg_count = std::size(gpr_regs);
+  uint32_t fpr_regs[] = {dwarf_s0,  dwarf_s1,  dwarf_s2,  dwarf_s3,
+                         dwarf_s4,  dwarf_s5,  dwarf_s6,  dwarf_s7,
+                         dwarf_s8,  dwarf_s9,  dwarf_s10, dwarf_s11,
+                         dwarf_s12, dwarf_s13, dwarf_s14, dwarf_s15};
+  const int fpr_reg_count = std::size(fpr_regs);
+
+  RegisterContextSP reg_ctx_sp = thread.GetRegisterContext();
+  std::vector<uint32_t> saved_regs;
+  for (int i = 0; i < gpr_reg_count; i++) {
+    uint32_t regno = gpr_regs[i];
+    reg_ctx_sp->ConvertBetweenRegisterKinds(eRegisterKindDWARF, gpr_regs[i],
+                                            plan_regkind, regno);
+    saved_regs.push_back(regno);
+  }
+  if (fp_regs_saved) {
+    for (int i = 0; i < fpr_reg_count; i++) {
+      uint32_t regno = fpr_regs[i];
+      reg_ctx_sp->ConvertBetweenRegisterKinds(eRegisterKindDWARF, fpr_regs[i],
+                                              plan_regkind, regno);
+      saved_regs.push_back(regno);
+    }
+  }
+
+  addr_t cfa;
+  if (!regctx->GetCFA(cfa))
+    return {};
+
+  // The CPSR value saved to stack is actually (from Armv7-M ARM)
+  //   "XPSR<31:10>:frameptralign:XPSR<8:0>"
+  // Bit 9 indicates that the stack pointer was aligned (to
+  // an 8-byte alignment) when the exception happened, and we must
+  // account for that when restoring the original stack pointer value.
+  Status error;
+  uint32_t callers_xPSR =
+      process_sp->ReadUnsignedIntegerFromMemory(cfa + 0x1c, 4, 0, error);
+  const bool align_stack = callers_xPSR & (1U << 9);
+  uint32_t callers_sp = cfa + stored_regs_size;
+  if (align_stack)
+    callers_sp |= 4;
+
+  Log *log = GetLog(LLDBLog::Unwind);
+  LLDB_LOGF(log,
+            "ArchitectureArm::GetArchitectureUnwindPlan found caller return "
+            "addr of 0x%" PRIx64 ", for frame with CFA 0x%" PRIx64
+            ", fp_regs_saved %d, stored_regs_size 0x%x, align stack %d",
+            callers_return_address, cfa, fp_regs_saved, stored_regs_size,
+            align_stack);
+
+  uint32_t sp_regnum = dwarf_sp;
+  reg_ctx_sp->ConvertBetweenRegisterKinds(eRegisterKindDWARF, dwarf_sp,
+                                          plan_regkind, sp_regnum);
+
+  const int row_count = current_unwindplan->GetRowCount();
+  for (int i = 0; i < row_count; i++) {
+    UnwindPlan::Row row = *current_unwindplan->GetRowAtIndex(i);
+    uint32_t offset = 0;
+    const size_t saved_reg_count = saved_regs.size();
+    for (size_t j = 0; j < saved_reg_count; j++) {
+      // The locations could be set with
+      // SetRegisterLocationToIsConstant(regno, cfa+offset)
+      // expressing it in terms of CFA addr+offset - this UnwindPlan
+      // is only used once, with this specific CFA.  I'm not sure
+      // which will be clearer for someone reading the unwind log.
+      row.SetRegisterLocationToAtCFAPlusOffset(saved_regs[j], offset, true);
+      offset += 4;
+    }
+    row.SetRegisterLocationToIsCFAPlusOffset(sp_regnum, callers_sp - cfa, true);
+    new_plan->AppendRow(row);
+  }
+  return new_plan;
+}
diff --git a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h
index f579d6b..52277dc 100644
--- a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h
+++ b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h
@@ -10,6 +10,7 @@
 #define LLDB_SOURCE_PLUGINS_ARCHITECTURE_ARM_ARCHITECTUREARM_H
 
 #include "lldb/Core/Architecture.h"
+#include "lldb/Target/Thread.h"
 
 namespace lldb_private {
 
@@ -29,6 +30,10 @@ public:
   lldb::addr_t GetOpcodeLoadAddress(lldb::addr_t load_addr,
                                     AddressClass addr_class) const override;
 
+  lldb::UnwindPlanSP GetArchitectureUnwindPlan(
+      lldb_private::Thread &thread, lldb_private::RegisterContextUnwind *regctx,
+      std::shared_ptr<const UnwindPlan> current_unwindplan) override;
+
 private:
   static std::unique_ptr<Architecture> Create(const ArchSpec &arch);
   ArchitectureArm() = default;
diff --git a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp
index cb8ba05..69885aa 100644
--- a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp
+++ b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp
@@ -12,6 +12,7 @@
 #include "lldb/Core/PluginManager.h"
 #include "lldb/Core/Section.h"
 #include "lldb/Symbol/Symbol.h"
+#include "lldb/Target/Target.h"
 #include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/Log.h"
 #include "llvm/ADT/DenseSet.h"
@@ -233,6 +234,40 @@ void ObjectFileJSON::CreateSections(SectionList &unified_section_list) {
   }
 }
 
+bool ObjectFileJSON::SetLoadAddress(Target &target, lldb::addr_t value,
+                                    bool value_is_offset) {
+  Log *log(GetLog(LLDBLog::DynamicLoader));
+  if (!m_sections_up)
+    return true;
+
+  addr_t slide = value;
+  if (!value_is_offset) {
+    addr_t lowest_addr = LLDB_INVALID_ADDRESS;
+    for (const SectionSP &section_sp : *m_sections_up) {
+      addr_t section_load_addr = section_sp->GetFileAddress();
+      lowest_addr = std::min(lowest_addr, section_load_addr);
+    }
+    if (lowest_addr == LLDB_INVALID_ADDRESS)
+      return false;
+    slide = value - lowest_addr;
+  }
+
+  // Apply slide to each section's file address.
+  for (const SectionSP &section_sp : *m_sections_up) {
+    addr_t section_load_addr = section_sp->GetFileAddress();
+    if (section_load_addr != LLDB_INVALID_ADDRESS) {
+      LLDB_LOGF(
+          log,
+          "ObjectFileJSON::SetLoadAddress section %s to load addr 0x%" PRIx64,
+          section_sp->GetName().AsCString(), section_load_addr + slide);
+      target.SetSectionLoadAddress(section_sp, section_load_addr + slide,
+                                   /*warn_multiple=*/true);
+    }
+  }
+
+  return true;
+}
+
 bool ObjectFileJSON::MagicBytesMatch(DataBufferSP data_sp,
                                      lldb::addr_t data_offset,
                                      lldb::addr_t data_length) {
diff --git a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h
index b72565f..029c8ff 100644
--- a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h
+++ b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h
@@ -86,6 +86,9 @@ public:
 
   Strata CalculateStrata() override { return eStrataUser; }
 
+  bool SetLoadAddress(Target &target, lldb::addr_t value,
+                      bool value_is_offset) override;
+
   static bool MagicBytesMatch(lldb::DataBufferSP data_sp, lldb::addr_t offset,
                               lldb::addr_t length);
 
diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
index 6037c8d..a780b3f 100644
--- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
+++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
@@ -799,6 +799,23 @@ Status ProcessMachCore::DoGetMemoryRegionInfo(addr_t load_addr,
       region_info.SetMapped(MemoryRegionInfo::eNo);
     }
     return Status();
+  } else {
+    // The corefile has no LC_SEGMENT at this virtual address,
+    // but see if there is a binary whose Section has been
+    // loaded at that address in the current Target.
+    Address addr;
+    if (GetTarget().ResolveLoadAddress(load_addr, addr)) {
+      SectionSP section_sp(addr.GetSection());
+      if (section_sp) {
+        region_info.GetRange().SetRangeBase(
+            section_sp->GetLoadBaseAddress(&GetTarget()));
+        region_info.GetRange().SetByteSize(section_sp->GetByteSize());
+        if (region_info.GetRange().Contains(load_addr)) {
+          region_info.SetLLDBPermissions(section_sp->GetPermissions());
+          return Status();
+        }
+      }
+    }
   }
 
   region_info.GetRange().SetRangeBase(load_addr);
diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp
index bcf1297..787eb94 100644
--- a/lldb/source/Target/RegisterContextUnwind.cpp
+++ b/lldb/source/Target/RegisterContextUnwind.cpp
@@ -293,6 +293,9 @@ void RegisterContextUnwind::InitializeZerothFrame() {
     return;
   }
 
+  // Give the Architecture a chance to replace the UnwindPlan.
+  TryAdoptArchitectureUnwindPlan();
+
   UnwindLogMsg("initialized frame current pc is 0x%" PRIx64 " cfa is 0x%" PRIx64
                " afa is 0x%" PRIx64 " using %s UnwindPlan",
                (uint64_t)m_current_pc.GetLoadAddress(exe_ctx.GetTargetPtr()),
@@ -482,6 +485,9 @@ void RegisterContextUnwind::InitializeNonZerothFrame() {
         }
       }
 
+      // Give the Architecture a chance to replace the UnwindPlan.
+      TryAdoptArchitectureUnwindPlan();
+
       UnwindLogMsg("initialized frame cfa is 0x%" PRIx64 " afa is 0x%" PRIx64,
                    (uint64_t)m_cfa, (uint64_t)m_afa);
       return;
@@ -686,6 +692,9 @@ void RegisterContextUnwind::InitializeNonZerothFrame() {
     }
   }
 
+  // Give the Architecture a chance to replace the UnwindPlan.
+  TryAdoptArchitectureUnwindPlan();
+
   UnwindLogMsg("initialized frame current pc is 0x%" PRIx64
                " cfa is 0x%" PRIx64 " afa is 0x%" PRIx64,
                (uint64_t)m_current_pc.GetLoadAddress(exe_ctx.GetTargetPtr()),
@@ -1717,6 +1726,41 @@ RegisterContextUnwind::SavedLocationForRegister(
   return UnwindLLDB::RegisterSearchResult::eRegisterNotFound;
 }
 
+UnwindPlanSP RegisterContextUnwind::TryAdoptArchitectureUnwindPlan() {
+  if (!m_full_unwind_plan_sp)
+    return {};
+  ProcessSP process_sp = m_thread.GetProcess();
+  if (!process_sp)
+    return {};
+
+  UnwindPlanSP arch_override_plan_sp;
+  if (Architecture *arch = process_sp->GetTarget().GetArchitecturePlugin())
+    arch_override_plan_sp =
+        arch->GetArchitectureUnwindPlan(m_thread, this, m_full_unwind_plan_sp);
+
+  if (arch_override_plan_sp) {
+    m_full_unwind_plan_sp = arch_override_plan_sp;
+    PropagateTrapHandlerFlagFromUnwindPlan(m_full_unwind_plan_sp);
+    m_registers.clear();
+    if (GetLog(LLDBLog::Unwind)) {
+      UnwindLogMsg(
+          "Replacing Full Unwindplan with Architecture UnwindPlan, '%s'",
+          m_full_unwind_plan_sp->GetSourceName().AsCString());
+      const UnwindPlan::Row *active_row =
+          m_full_unwind_plan_sp->GetRowForFunctionOffset(m_current_offset);
+      if (active_row) {
+        StreamString active_row_strm;
+        active_row->Dump(active_row_strm, m_full_unwind_plan_sp.get(),
+                         &m_thread,
+                         m_start_pc.GetLoadAddress(&process_sp->GetTarget()));
+        UnwindLogMsg("%s", active_row_strm.GetData());
+      }
+    }
+  }
+
+  return {};
+}
+
 // TryFallbackUnwindPlan() -- this method is a little tricky.
 //
 // When this is called, the frame above -- the caller frame, the "previous"
diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp
index ddf8c62..f47dae2 100644
--- a/lldb/source/Target/StopInfo.cpp
+++ b/lldb/source/Target/StopInfo.cpp
@@ -108,8 +108,7 @@ public:
   void StoreBPInfo() {
     ThreadSP thread_sp(m_thread_wp.lock());
     if (thread_sp) {
-      BreakpointSiteSP bp_site_sp(
-          thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value));
+      BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP();
       if (bp_site_sp) {
         uint32_t num_constituents = bp_site_sp->GetNumberOfConstituents();
         if (num_constituents == 1) {
@@ -139,8 +138,7 @@ public:
   bool IsValidForOperatingSystemThread(Thread &thread) override {
     ProcessSP process_sp(thread.GetProcess());
     if (process_sp) {
-      BreakpointSiteSP bp_site_sp(
-          process_sp->GetBreakpointSiteList().FindByID(m_value));
+      BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP();
       if (bp_site_sp)
         return bp_site_sp->ValidForThisThread(thread);
     }
@@ -154,8 +152,7 @@ public:
     if (thread_sp) {
       if (!m_should_stop_is_valid) {
         // Only check once if we should stop at a breakpoint
-        BreakpointSiteSP bp_site_sp(
-            thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value));
+        BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP();
         if (bp_site_sp) {
           ExecutionContext exe_ctx(thread_sp->GetStackFrameAtIndex(0));
           StoppointCallbackContext context(event_ptr, exe_ctx, true);
@@ -186,8 +183,7 @@ public:
     if (m_description.empty()) {
       ThreadSP thread_sp(m_thread_wp.lock());
       if (thread_sp) {
-        BreakpointSiteSP bp_site_sp(
-            thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value));
+        BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP();
         if (bp_site_sp) {
           StreamString strm;
           // If we have just hit an internal breakpoint, and it has a kind
@@ -247,6 +243,35 @@ public:
     return m_description.c_str();
   }
 
+  uint32_t GetStopReasonDataCount() const override {
+    lldb::BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP();
+    if (bp_site_sp)
+      return bp_site_sp->GetNumberOfConstituents() * 2;
+    return 0; // Breakpoint must have cleared itself...
+  }
+
+  uint64_t GetStopReasonDataAtIndex(uint32_t idx) override {
+    lldb::BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP();
+    if (bp_site_sp) {
+      uint32_t bp_index = idx / 2;
+      BreakpointLocationSP bp_loc_sp(
+          bp_site_sp->GetConstituentAtIndex(bp_index));
+      if (bp_loc_sp) {
+        if (idx & 1) {
+          // FIXME: This might be a Facade breakpoint, so we need to fetch
+          // the one that the thread actually hit, not the native loc ID.
+
+          // Odd idx, return the breakpoint location ID
+          return bp_loc_sp->GetID();
+        } else {
+          // Even idx, return the breakpoint ID
+          return bp_loc_sp->GetBreakpoint().GetID();
+        }
+      }
+    }
+    return LLDB_INVALID_BREAK_ID;
+  }
+
   std::optional<uint32_t>
   GetSuggestedStackFrameIndex(bool inlined_stack) override {
     if (!inlined_stack)
@@ -255,8 +280,7 @@ public:
     ThreadSP thread_sp(m_thread_wp.lock());
     if (!thread_sp)
       return {};
-    BreakpointSiteSP bp_site_sp(
-        thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value));
+    BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP();
     if (!bp_site_sp)
       return {};
 
@@ -297,8 +321,7 @@ protected:
         return;
       }
 
-      BreakpointSiteSP bp_site_sp(
-          thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value));
+      BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP();
       std::unordered_set<break_id_t> precondition_breakpoints;
       // Breakpoints that fail their condition check are not considered to
       // have been hit.  If the only locations at this site have failed their
@@ -629,6 +652,20 @@ protected:
   }
 
 private:
+  BreakpointSiteSP GetBreakpointSiteSP() const {
+    if (m_value == LLDB_INVALID_BREAK_ID)
+      return {};
+
+    ThreadSP thread_sp = GetThread();
+    if (!thread_sp)
+      return {};
+    ProcessSP process_sp = thread_sp->GetProcess();
+    if (!process_sp)
+      return {};
+
+    return process_sp->GetBreakpointSiteList().FindByID(m_value);
+  }
+
   bool m_should_stop;
   bool m_should_stop_is_valid;
   bool m_should_perform_action; // Since we are trying to preserve the "state"
@@ -699,6 +736,13 @@ public:
 
   StopReason GetStopReason() const override { return eStopReasonWatchpoint; }
 
+  uint32_t GetStopReasonDataCount() const override { return 1; }
+  uint64_t GetStopReasonDataAtIndex(uint32_t idx) override {
+    if (idx == 0)
+      return GetValue();
+    return 0;
+  }
+
   const char *GetDescription() override {
     if (m_description.empty()) {
       StreamString strm;
@@ -1139,6 +1183,13 @@ public:
 
   bool ShouldSelect() const override { return IsShouldStopSignal(); }
 
+  uint32_t GetStopReasonDataCount() const override { return 1; }
+  uint64_t GetStopReasonDataAtIndex(uint32_t idx) override {
+    if (idx == 0)
+      return GetValue();
+    return 0;
+  }
+
 private:
   // In siginfo_t terms, if m_value is si_signo, m_code is si_code.
   std::optional<int> m_code;
@@ -1171,6 +1222,14 @@ public:
     }
     return m_description.c_str();
   }
+
+  uint32_t GetStopReasonDataCount() const override { return 1; }
+  uint64_t GetStopReasonDataAtIndex(uint32_t idx) override {
+    if (idx == 0)
+      return GetValue();
+    else
+      return 0;
+  }
 };
 
 // StopInfoTrace
@@ -1249,6 +1308,13 @@ public:
     else
       return m_description.c_str();
   }
+  uint32_t GetStopReasonDataCount() const override { return 1; }
+  uint64_t GetStopReasonDataAtIndex(uint32_t idx) override {
+    if (idx == 0)
+      return GetValue();
+    else
+      return 0;
+  }
 };
 
 // StopInfoProcessorTrace
@@ -1390,6 +1456,14 @@ public:
 
   const char *GetDescription() override { return "fork"; }
 
+  uint32_t GetStopReasonDataCount() const override { return 1; }
+  uint64_t GetStopReasonDataAtIndex(uint32_t idx) override {
+    if (idx == 0)
+      return GetValue();
+    else
+      return 0;
+  }
+
 protected:
   void PerformAction(Event *event_ptr) override {
     // Only perform the action once
@@ -1424,6 +1498,13 @@ public:
 
   const char *GetDescription() override { return "vfork"; }
 
+  uint32_t GetStopReasonDataCount() const override { return 1; }
+  uint64_t GetStopReasonDataAtIndex(uint32_t idx) override {
+    if (idx == 0)
+      return GetValue();
+    return 0;
+  }
+
 protected:
   void PerformAction(Event *event_ptr) override {
     // Only perform the action once
diff --git a/lldb/test/API/commands/expression/import-std-module/array/TestArrayFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/array/TestArrayFromStdModule.py
index a488276..ed028a1 100644
--- a/lldb/test/API/commands/expression/import-std-module/array/TestArrayFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/array/TestArrayFromStdModule.py
@@ -11,7 +11,6 @@ class TestCase(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/deque-basic/TestDequeFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/deque-basic/TestDequeFromStdModule.py
index 38b8508..0fb6e88 100644
--- a/lldb/test/API/commands/expression/import-std-module/deque-basic/TestDequeFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/deque-basic/TestDequeFromStdModule.py
@@ -11,7 +11,6 @@ class TestBasicDeque(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/TestDbgInfoContentDequeFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/TestDbgInfoContentDequeFromStdModule.py
index 85eaa8f..e631a87 100644
--- a/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/TestDbgInfoContentDequeFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/TestDbgInfoContentDequeFromStdModule.py
@@ -12,7 +12,6 @@ class TestDbgInfoContentDeque(TestBase):
     @skipIf(compiler=no_match("clang"))
     @skipIf(compiler="clang", compiler_version=["<", "18.0"])
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/forward_list-dbg-info-content/TestDbgInfoContentForwardListFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/forward_list-dbg-info-content/TestDbgInfoContentForwardListFromStdModule.py
index a3a409d..1d0f9ccf 100644
--- a/lldb/test/API/commands/expression/import-std-module/forward_list-dbg-info-content/TestDbgInfoContentForwardListFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/forward_list-dbg-info-content/TestDbgInfoContentForwardListFromStdModule.py
@@ -11,7 +11,6 @@ class TestDbgInfoContentForwardList(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/forward_list/TestForwardListFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/forward_list/TestForwardListFromStdModule.py
index c9f4a15..a6ba081 100644
--- a/lldb/test/API/commands/expression/import-std-module/forward_list/TestForwardListFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/forward_list/TestForwardListFromStdModule.py
@@ -11,7 +11,6 @@ class TestBasicForwardList(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/iterator/TestIteratorFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/iterator/TestIteratorFromStdModule.py
index 5c82ac3..370c367 100644
--- a/lldb/test/API/commands/expression/import-std-module/iterator/TestIteratorFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/iterator/TestIteratorFromStdModule.py
@@ -11,7 +11,6 @@ class TestCase(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/TestDbgInfoContentListFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/TestDbgInfoContentListFromStdModule.py
index 0ecc244..b26bd7d 100644
--- a/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/TestDbgInfoContentListFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/TestDbgInfoContentListFromStdModule.py
@@ -13,7 +13,6 @@ class TestDbgInfoContentList(TestBase):
     @skipIf(compiler=no_match("clang"))
     @skipIf(compiler="clang", compiler_version=["<", "12.0"])
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/list/TestListFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/list/TestListFromStdModule.py
index f29f353..6253a35 100644
--- a/lldb/test/API/commands/expression/import-std-module/list/TestListFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/list/TestListFromStdModule.py
@@ -11,7 +11,6 @@ class TestBasicList(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/non-module-type-separation/TestNonModuleTypeSeparation.py b/lldb/test/API/commands/expression/import-std-module/non-module-type-separation/TestNonModuleTypeSeparation.py
index 5e0ab48..cc91ddc 100644
--- a/lldb/test/API/commands/expression/import-std-module/non-module-type-separation/TestNonModuleTypeSeparation.py
+++ b/lldb/test/API/commands/expression/import-std-module/non-module-type-separation/TestNonModuleTypeSeparation.py
@@ -12,7 +12,6 @@ class TestCase(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         """
         This test is creating ValueObjects with both C++ module and debug
diff --git a/lldb/test/API/commands/expression/import-std-module/shared_ptr-dbg-info-content/TestSharedPtrDbgInfoContentFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/shared_ptr-dbg-info-content/TestSharedPtrDbgInfoContentFromStdModule.py
index 50419b6..5bfdb9b 100644
--- a/lldb/test/API/commands/expression/import-std-module/shared_ptr-dbg-info-content/TestSharedPtrDbgInfoContentFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/shared_ptr-dbg-info-content/TestSharedPtrDbgInfoContentFromStdModule.py
@@ -11,7 +11,6 @@ class TestSharedPtrDbgInfoContent(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/shared_ptr/TestSharedPtrFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/shared_ptr/TestSharedPtrFromStdModule.py
index 9f04361..da86466 100644
--- a/lldb/test/API/commands/expression/import-std-module/shared_ptr/TestSharedPtrFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/shared_ptr/TestSharedPtrFromStdModule.py
@@ -10,9 +10,8 @@ from lldbsuite.test import lldbutil
 class TestSharedPtr(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
-    @skipIf(compiler="clang", compiler_version=["<", "17.0"])
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
+    @skipIf(compiler="clang", compiler_version=["<", "17.0"])
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
index ba4df40..1c32222 100644
--- a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
@@ -14,7 +14,6 @@ class TestDbgInfoContentVector(TestBase):
     @skipIf(compiler="clang", compiler_version=["<", "12.0"])
     @skipIf(macos_version=["<", "14.0"])
     @skipIfDarwin  # https://github.com/llvm/llvm-project/issues/106475
-    @skipIfLinux
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py
index 6fa9dd5..2cddce0 100644
--- a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py
@@ -11,7 +11,6 @@ class TestVectorOfVectors(TestBase):
     @add_test_categories(["libc++"])
     @skipIf(compiler=no_match("clang"))
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/weak_ptr-dbg-info-content/TestDbgInfoContentWeakPtrFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/weak_ptr-dbg-info-content/TestDbgInfoContentWeakPtrFromStdModule.py
index 19d24c1..28edf19 100644
--- a/lldb/test/API/commands/expression/import-std-module/weak_ptr-dbg-info-content/TestDbgInfoContentWeakPtrFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/weak_ptr-dbg-info-content/TestDbgInfoContentWeakPtrFromStdModule.py
@@ -12,7 +12,6 @@ class TestDbgInfoContentWeakPtr(TestBase):
     @skipIf(compiler=no_match("clang"))
     @skipIf(compiler="clang", compiler_version=["<", "17.0"])
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/commands/expression/import-std-module/weak_ptr/TestWeakPtrFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/weak_ptr/TestWeakPtrFromStdModule.py
index e3cc9b9..f0a0a46 100644
--- a/lldb/test/API/commands/expression/import-std-module/weak_ptr/TestWeakPtrFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/weak_ptr/TestWeakPtrFromStdModule.py
@@ -12,7 +12,6 @@ class TestSharedPtr(TestBase):
     @skipIf(compiler=no_match("clang"))
     @skipIf(compiler="clang", compiler_version=["<", "17.0"])
     @skipIf(macos_version=["<", "15.0"])
-    @skipUnlessDarwin
     def test(self):
         self.build()
 
diff --git a/lldb/test/API/functionalities/postmortem/elf-core/gcore/TestGCore.py b/lldb/test/API/functionalities/postmortem/elf-core/gcore/TestGCore.py
index 020a226..497b8e8 100644
--- a/lldb/test/API/functionalities/postmortem/elf-core/gcore/TestGCore.py
+++ b/lldb/test/API/functionalities/postmortem/elf-core/gcore/TestGCore.py
@@ -37,7 +37,7 @@ class GCoreTestCase(TestBase):
         for thread in process:
             reason = thread.GetStopReason()
             self.assertStopReason(reason, lldb.eStopReasonSignal)
-            signal = thread.GetStopReasonDataAtIndex(1)
+            signal = thread.GetStopReasonDataAtIndex(0)
             # Check we got signal 19 (SIGSTOP)
             self.assertEqual(signal, 19)
 
diff --git a/lldb/test/API/functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py b/lldb/test/API/functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py
index 4a848d1..6d9aef2 100644
--- a/lldb/test/API/functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py
+++ b/lldb/test/API/functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py
@@ -91,7 +91,7 @@ class LinuxCoreThreadsTestCase(TestBase):
             reason = thread.GetStopReason()
             if thread.GetThreadID() == tid:
                 self.assertStopReason(reason, lldb.eStopReasonSignal)
-                signal = thread.GetStopReasonDataAtIndex(1)
+                signal = thread.GetStopReasonDataAtIndex(0)
                 # Check we got signal 4 (SIGILL)
                 self.assertEqual(signal, 4)
             else:
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/Makefile b/lldb/test/API/functionalities/unwind/cortex-m-exception/Makefile
new file mode 100644
index 0000000..22f1051
--- /dev/null
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/Makefile
@@ -0,0 +1 @@
+include Makefile.rules
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
new file mode 100644
index 0000000..267f8c8
--- /dev/null
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py
@@ -0,0 +1,56 @@
+"""
+Test that we can backtrace up an ARM Cortex-M Exception return stack
+"""
+
+import lldb
+import json
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestCortexMExceptionUnwind(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    @skipUnlessDarwin  # on the lldb-remote-linux-ubuntu CI, only get 1 stack frame not 6
+    def test_no_fpu(self):
+        """Test that we can backtrace correctly through an ARM Cortex-M Exception return stack"""
+
+        target = self.dbg.CreateTarget("")
+        exe = "binary.json"
+        with open(exe) as f:
+            exe_json = json.load(f)
+            exe_uuid = exe_json["uuid"]
+
+        target.AddModule(exe, "", exe_uuid)
+        self.assertTrue(target.IsValid())
+
+        core = self.getBuildArtifact("core")
+        self.yaml2macho_core("armv7m-nofpu-exception.yaml", core, exe_uuid)
+
+        process = target.LoadCore(core)
+        self.assertTrue(process.IsValid())
+
+        if self.TraceOn():
+            self.runCmd("target list")
+            self.runCmd("image list")
+            self.runCmd("target modules dump sections")
+            self.runCmd("target modules dump symtab")
+            self.runCmd("bt")
+
+        thread = process.GetThreadAtIndex(0)
+        self.assertTrue(thread.IsValid())
+
+        # We have 4 named stack frames and two unnamed
+        # frames above that.  The topmost two stack frames
+        # were not interesting for this test, so I didn't
+        # create symbols for them.
+        self.assertEqual(thread.GetNumFrames(), 6)
+        stackframe_names = [
+            "exception_catcher",
+            "exception_catcher",
+            "exception_thrower",
+            "main",
+        ]
+        for i, name in enumerate(stackframe_names):
+            self.assertEqual(name, thread.GetFrameAtIndex(i).GetSymbol().GetName())
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml b/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml
new file mode 100644
index 0000000..9ce5ff4
--- /dev/null
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml
@@ -0,0 +1,64 @@
+cpu: armv7m
+threads:
+  - regsets:
+      - flavor: gpr
+        registers: [{name: sp, value: 0x2000fe70}, {name: r7, value: 0x2000fe80}, 
+                    {name: pc, value: 0x0020392c}, {name: lr, value: 0x0020392d}]
+memory-regions:
+  # stack memory fetched via 
+  # (lldb) p/x $sp
+  # (lldb) x/128wx $sp
+  # % pbpaste | sed -e 's,.*: ,,' -e 's/ /, /g' -e 's/$/,/'
+  - addr: 0x2000fe70
+    UInt32: [
+      0x0000002a, 0x20010e58, 0x00203923, 0x00000001,
+      0x2000fe88, 0x00203911, 0x2000ffdc, 0xfffffff9,
+      0x00000102, 0x00000002, 0x000003f0, 0x0000002a,
+      0x20012620, 0x00203215, 0x00203366, 0x81000200,
+      0x00203215, 0x200128b0, 0x0024928d, 0x2000fecc,
+      0x002491ed, 0x20010e58, 0x20010e4c, 0x2000ffa0,
+      0x200107a0, 0x0000003c, 0x200116e8, 0x200108b0,
+      0x0020b895, 0x00000000, 0x0000e200, 0x2001227d,
+      0x200121fd, 0x0000e000, 0x00000000, 0x200129a0,
+      0x002035bf, 0x00000029, 0x000003d8, 0x20011120,
+      0x200116e0, 0x40003800, 0x20011120, 0x00000000,
+      0x00205169, 0x00203713, 0x00000000, 0x0022dcb9,
+      0x40003800, 0x20011240, 0x00000000, 0xf7d71ecf,
+      0xfc7676d6, 0x00000000, 0x968782d3, 0xe75afbbb,
+      0x600d77c8, 0xc1c05886, 0x17f3e76d, 0xefc3054d,
+      0x11940aaa, 0x00000000, 0x93bffabb, 0x6db85af0,
+      0x00000000, 0x2001d76f, 0xcb35f653, 0x00000000,
+      0x00000000, 0x079d5058, 0x00000000, 0x00000000,
+      0xc5622949, 0x68682572, 0x00000075, 0x0000e500,
+      0x20012c30, 0x00000000, 0xcdfcd8c2, 0x76efc90f,
+      0x0024495f, 0x20012bf0, 0x0000e400, 0x00000000,
+      0x00000000, 0x00000000, 0x00000000, 0x00000000,
+      0x0029089c, 0x0029089c, 0x00000000, 0x2000ffe4,
+      0x00202a87, 0x2000ffec, 0x00200257, 0x2000fff4,
+      0x00200211, 0x00000000, 0x00000000, 0x7badb3f6,
+      0x20010794, 0x20010fac, 0x200109b0, 0x002887a4,
+      0x00285688, 0x002854c8, 0x00288f74, 0x0028a618,
+      0x0028a6f8, 0x00000000, 0x00000001, 0x00000000,
+      0x00000000, 0x00000000, 0x002037dd, 0x00000000,
+      0x00000002, 0x00000100, 0x00000000, 0x20010064,
+      0x00000000, 0x00000000, 0x00000000, 0x200109c0,
+      0x00000000, 0x00000000, 0x00000000, 0x00000000
+    ]
+  # exception_catcher() function bytes
+  # (lldb) dis
+  # binary`exception_catcher:
+  #     0x203910 <+0>:  push   {r3, r4, r5, r6, r7, lr}
+  #     0x203912 <+2>:  add    r7, sp, #0x10
+  #   ...
+  # (lldb) x/44bx 0x203910
+  # % pbpaste | sed -e 's,.*: ,,' -e 's/ /, /g' -e 's/$/,/'
+  - addr: 0x203910
+    UInt8: [
+      0xf8, 0xb5, 0x04, 0xaf, 0x06, 0x4c, 0x07, 0x49,
+      0x74, 0xf0, 0x2e, 0xf8, 0x01, 0xac, 0x74, 0xf0,
+      0x61, 0xf8, 0x05, 0x48, 0x76, 0xf0, 0xdf, 0xfe,
+      0x74, 0xf0, 0x0b, 0xf9, 0xfe, 0xe7, 0x00, 0xbf,
+      0x4c, 0x0e, 0x01, 0x20, 0x0d, 0x35, 0x20, 0x00,
+      0x98, 0xae, 0x28, 0x00
+    ]
+
diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json b/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json
new file mode 100644
index 0000000..8fcd530
--- /dev/null
+++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json
@@ -0,0 +1,41 @@
+{
+  "triple": "armv7m-apple",
+  "uuid": "2D157DBA-53C9-3AC7-B5A1-9D336EC831CB",
+  "type": "executable",
+  "sections": [
+    {
+      "user_id": 100,
+      "name": "TEXT",
+      "type": "code",
+      "address": 2097664,
+      "size": 598872,
+      "file_offset": 0,
+      "file_size": 598872,
+      "alignment": 2,
+      "flags": 514,
+      "read": true,
+      "write": false,
+      "execute": true
+    }
+  ],
+  "symbols": [
+    {
+      "name": "main",
+      "type": "code",
+      "size": 10,
+      "address": 2108030
+    },
+    {
+      "name": "exception_catcher",
+      "type": "code",
+      "size": 44,
+      "address": 2111760
+    },
+    {
+      "name": "exception_thrower",
+      "type": "code",
+      "size": 2652,
+      "address": 2108040
+    }
+  ]
+}
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 673c4f7..75e8fe4 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -521,7 +521,8 @@ bool CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
   // Only VP intrinsics can have an %evl parameter.
   Value *OldMaskParam = VPI.getMaskParam();
   if (!OldMaskParam) {
-    assert(VPI.getIntrinsicID() == Intrinsic::vp_merge &&
+    assert((VPI.getIntrinsicID() == Intrinsic::vp_merge ||
+            VPI.getIntrinsicID() == Intrinsic::vp_select) &&
            "Unexpected VP intrinsic without mask operand");
     OldMaskParam = VPI.getArgOperand(0);
   }
@@ -537,7 +538,8 @@ bool CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
   ElementCount ElemCount = VPI.getStaticVectorLength();
   Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
   Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
-  if (VPI.getIntrinsicID() == Intrinsic::vp_merge)
+  if (VPI.getIntrinsicID() == Intrinsic::vp_merge ||
+      VPI.getIntrinsicID() == Intrinsic::vp_select)
     VPI.setArgOperand(0, NewMaskParam);
   else
     VPI.setMaskParam(NewMaskParam);
diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index ce35a5b..9245db4 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -1218,6 +1218,14 @@ Pattern::MatchResult Pattern::match(StringRef Buffer,
     StringRef MatchedValue = MatchInfo[CaptureParenGroup];
     ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat();
     APInt Value = Format.valueFromStringRepr(MatchedValue, SM);
+    // Numeric variables are already inserted into GlobalNumericVariableTable
+    // during parsing, but clearLocalVars might remove them, so we must
+    // reinsert them. Numeric-variable resolution does not access
+    // GlobalNumericVariableTable; it directly uses a pointer to the variable.
+    // However, other functions (such as clearLocalVars) may require active
+    // variables to be in the table.
+    Context->GlobalNumericVariableTable.try_emplace(NumericVariableDef.getKey(),
+                                                    DefinedNumericVariable);
     DefinedNumericVariable->setValue(Value, MatchedValue);
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 2ab2c14..023fd14 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -920,10 +920,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
     assert(!Subtarget->isAIXABI() &&
            "AIX does not support patchable function entry!");
-    // PATCHABLE_FUNCTION_ENTER on little endian is for XRAY support which is
-    // handled in PPCLinuxAsmPrinter.
-    if (MAI->isLittleEndian())
-      return;
     const Function &F = MF->getFunction();
     unsigned Num = 0;
     (void)F.getFnAttribute("patchable-function-entry")
@@ -1789,7 +1785,13 @@ void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) {
     // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
     // of instructions change.
     // XRAY is only supported on PPC Linux little endian.
-    if (!MAI->isLittleEndian())
+    const Function &F = MF->getFunction();
+    unsigned Num = 0;
+    (void)F.getFnAttribute("patchable-function-entry")
+        .getValueAsString()
+        .getAsInteger(10, Num);
+
+    if (!MAI->isLittleEndian() || Num)
       break;
     MCSymbol *BeginOfSled = OutContext.createTempSymbol();
     MCSymbol *EndOfSled = OutContext.createTempSymbol();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 10b2f5d..ac5e8d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1270,11 +1270,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
     return;
   }
 
-  VPInstruction *OpVPI;
-  if (match(Def, m_ExtractLastElement(m_VPInstruction(OpVPI))) &&
-      OpVPI->isVectorToScalar()) {
-    Def->replaceAllUsesWith(OpVPI);
-    return;
+  if (match(Def,
+            m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) &&
+      vputils::isSingleScalar(A) && all_of(A->users(), [Def, A](VPUser *U) {
+        return U->usesScalars(A) || Def == U;
+      })) {
+    return Def->replaceAllUsesWith(A);
   }
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/patchable-function-entry.ll b/llvm/test/CodeGen/PowerPC/patchable-function-entry.ll
index 0c2d282..f597754 100644
--- a/llvm/test/CodeGen/PowerPC/patchable-function-entry.ll
+++ b/llvm/test/CodeGen/PowerPC/patchable-function-entry.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=powerpc %s -o - | FileCheck %s --check-prefixes=CHECK,PPC32
 ; RUN: llc -mtriple=powerpc64 %s -o - | FileCheck %s --check-prefixes=CHECK,PPC64
+; RUN: llc -mtriple=powerpc64le %s -o - | FileCheck %s --check-prefix=PPC64LE
 
 @a = global i32 0, align 4
 
@@ -9,6 +10,12 @@ define void @f0() {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    blr
 ; CHECK-NOT:   .section    __patchable_function_entries
+;
+; PPC64LE-LABEL: f0:
+; PPC64LE-NOT:   nop
+; PPC64LE:       # %bb.0:
+; PPC64LE-NEXT:  blr
+; PPC64LE-NOT:   .section    __patchable_function_entries
   ret void
 }
 
@@ -18,6 +25,22 @@ define void @f1() "patchable-function-entry"="0" {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    blr
 ; CHECK-NOT:   .section    __patchable_function_entries
+;
+; PPC64LE-LABEL: f1:
+; PPC64LE:        # %bb.0:
+; PPC64LE-NEXT:   .Ltmp0:
+; PPC64LE-NEXT:   b .Ltmp1
+; PPC64LE-NEXT:   nop
+; PPC64LE-NEXT:   std 0, -8(1)
+; PPC64LE-NEXT:   mflr 0
+; PPC64LE-NEXT:   bl __xray_FunctionEntry
+; PPC64LE-NEXT:   nop
+; PPC64LE-NEXT:   mtlr 0
+; PPC64LE-NEXT:   .Ltmp1:
+; PPC64LE-NEXT:   blr
+; PPC64LE-NOT:    .section    __patchable_function_entries
+; PPC64LE:     .section        xray_instr_map
+; PPC64LE:     .section        xray_fn_idx
   ret void
 }
 
@@ -32,6 +55,17 @@ define void @f2() "patchable-function-entry"="1" {
 ; PPC64:       .p2align    3, 0x0
 ; PPC32-NEXT:  .long   .Lfunc_begin2
 ; PPC64-NEXT:  .quad   .Lfunc_begin2
+;
+; PPC64LE-LABEL: f2:
+; PPC64LE-LABEL-NEXT:  .Lfunc_begin2:
+; PPC64LE:         # %bb.0:
+; PPC64LE-NEXT:    nop
+; PPC64LE-NEXT:    blr
+; PPC64LE:        .section    __patchable_function_entries
+; PPC64LE:        .p2align    3, 0x0
+; PPC64LE-NEXT:   .quad   .Lfunc_begin2
+; PPC64LE-NOT:    .section        xray_instr_map
+; PPC64LE-NOT:    .section        xray_fn_idx
   ret void
 }
 
@@ -52,6 +86,21 @@ define i32 @f3() "patchable-function-entry"="1" "patchable-function-prefix"="2"
 ; PPC64:       .p2align    3, 0x0
 ; PPC32-NEXT:  .long   .Ltmp0
 ; PPC64-NEXT:  .quad   .Ltmp0
+;
+; PC64LE-LABEL:    .Ltmp3:
+; PC64LE-COUNT-2:  nop
+; PC64LE-LABEL:    f3:
+; PC64LE:          # %bb.0:
+; PC64LE-NEXT:     nop
+; PC64LE:          addis 3, 2, .LC0@toc@ha
+; PC64LE-NEXT:     ld 3, .LC0@toc@l(3)
+; PC64LE-NEXT:     lwz 3, 0(3)
+; PC64LE:          blr
+; PC64LE:         .section    __patchable_function_entries
+; PPC64LE:        .p2align    3, 0x0
+; PPC64LE-NEXT:   .quad   .Ltmp3
+; PC64LE-NOT:     .section    xray_instr_map
+; PC64LE-NOT:     .section    xray_fn_idx
 entry:
   %0 = load i32, ptr @a, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/RISCV/select-zbb.ll b/llvm/test/CodeGen/RISCV/select-zbb.ll
new file mode 100644
index 0000000..0af699a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/select-zbb.ll
@@ -0,0 +1,1614 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=RV32IM %s
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=RV64IM %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zbb -verify-machineinstrs < %s | FileCheck --check-prefixes=RV32IMZBB %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zbb -verify-machineinstrs < %s | FileCheck --check-prefixes=RV64IMZBB %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=RV32IMZICOND %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=RV64IMZICOND %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zicond,+zbb -verify-machineinstrs < %s | FileCheck --check-prefixes=RV32IMBOTH %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zicond,+zbb -verify-machineinstrs < %s | FileCheck --check-prefixes=RV64IMBOTH %s
+
+
+define i32 @select_umin_1(i1 zeroext %cond, i32 %a, i32 %b) {
+; RV32IM-LABEL: select_umin_1:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    bgeu a1, a2, .LBB0_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB0_4
+; RV32IM-NEXT:  .LBB0_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB0_3: # %entry
+; RV32IM-NEXT:    mv a1, a2
+; RV32IM-NEXT:    bnez a0, .LBB0_2
+; RV32IM-NEXT:  .LBB0_4: # %entry
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_umin_1:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a3, a2
+; RV64IM-NEXT:    sext.w a1, a1
+; RV64IM-NEXT:    bgeu a1, a3, .LBB0_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB0_4
+; RV64IM-NEXT:  .LBB0_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB0_3: # %entry
+; RV64IM-NEXT:    mv a1, a3
+; RV64IM-NEXT:    bnez a0, .LBB0_2
+; RV64IM-NEXT:  .LBB0_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_umin_1:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    beqz a0, .LBB0_2
+; RV32IMZBB-NEXT:  # %bb.1:
+; RV32IMZBB-NEXT:    minu a2, a1, a2
+; RV32IMZBB-NEXT:  .LBB0_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a2
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_umin_1:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    beqz a0, .LBB0_2
+; RV64IMZBB-NEXT:  # %bb.1:
+; RV64IMZBB-NEXT:    sext.w a2, a2
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    minu a2, a1, a2
+; RV64IMZBB-NEXT:  .LBB0_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a2
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_umin_1:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    sltu a3, a1, a2
+; RV32IMZICOND-NEXT:    czero.nez a4, a2, a3
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a3
+; RV32IMZICOND-NEXT:    or a1, a1, a4
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    or a0, a1, a0
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_umin_1:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a3, a2
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    sltu a4, a1, a3
+; RV64IMZICOND-NEXT:    czero.nez a3, a3, a4
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a4
+; RV64IMZICOND-NEXT:    or a1, a1, a3
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    or a0, a1, a0
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_umin_1:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    minu a1, a1, a2
+; RV32IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV32IMBOTH-NEXT:    czero.eqz a0, a1, a0
+; RV32IMBOTH-NEXT:    or a0, a0, a2
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_umin_1:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a3, a2
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    minu a1, a1, a3
+; RV64IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV64IMBOTH-NEXT:    czero.eqz a0, a1, a0
+; RV64IMBOTH-NEXT:    or a0, a0, a2
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.umin(i32 %a, i32 %b)
+  %res = select i1 %cond, i32 %c, i32 %b
+  ret i32 %res
+}
+
+define i32 @select_umin_2(i1 zeroext %cond, i32 %a, i32 %b) {
+; RV32IM-LABEL: select_umin_2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    mv a3, a1
+; RV32IM-NEXT:    bgeu a1, a2, .LBB1_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB1_4
+; RV32IM-NEXT:  .LBB1_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB1_3: # %entry
+; RV32IM-NEXT:    mv a3, a2
+; RV32IM-NEXT:    bnez a0, .LBB1_2
+; RV32IM-NEXT:  .LBB1_4: # %entry
+; RV32IM-NEXT:    mv a0, a3
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_umin_2:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a3, a2
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    bgeu a2, a3, .LBB1_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB1_4
+; RV64IM-NEXT:  .LBB1_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB1_3: # %entry
+; RV64IM-NEXT:    mv a2, a3
+; RV64IM-NEXT:    bnez a0, .LBB1_2
+; RV64IM-NEXT:  .LBB1_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_umin_2:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    bnez a0, .LBB1_2
+; RV32IMZBB-NEXT:  # %bb.1: # %entry
+; RV32IMZBB-NEXT:    minu a1, a1, a2
+; RV32IMZBB-NEXT:  .LBB1_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_umin_2:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    bnez a0, .LBB1_2
+; RV64IMZBB-NEXT:  # %bb.1: # %entry
+; RV64IMZBB-NEXT:    sext.w a2, a2
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    minu a1, a1, a2
+; RV64IMZBB-NEXT:  .LBB1_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_umin_2:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    sltu a3, a1, a2
+; RV32IMZICOND-NEXT:    czero.nez a2, a2, a3
+; RV32IMZICOND-NEXT:    czero.eqz a3, a1, a3
+; RV32IMZICOND-NEXT:    or a2, a3, a2
+; RV32IMZICOND-NEXT:    czero.nez a2, a2, a0
+; RV32IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV32IMZICOND-NEXT:    or a0, a0, a2
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_umin_2:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a2, a2
+; RV64IMZICOND-NEXT:    sext.w a3, a1
+; RV64IMZICOND-NEXT:    sltu a4, a3, a2
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a4
+; RV64IMZICOND-NEXT:    czero.eqz a3, a3, a4
+; RV64IMZICOND-NEXT:    or a2, a3, a2
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a0
+; RV64IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV64IMZICOND-NEXT:    or a0, a0, a2
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_umin_2:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    minu a2, a1, a2
+; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_umin_2:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a2, a2
+; RV64IMBOTH-NEXT:    sext.w a3, a1
+; RV64IMBOTH-NEXT:    minu a2, a3, a2
+; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.umin(i32 %a, i32 %b)
+  %res = select i1 %cond, i32 %a, i32 %c
+  ret i32 %res
+}
+
+define i32 @select_umin_3(i1 zeroext %cond, i32 %a) {
+; RV32IM-LABEL: select_umin_3:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a3, 32
+; RV32IM-NEXT:    mv a2, a1
+; RV32IM-NEXT:    bgeu a1, a3, .LBB2_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB2_4
+; RV32IM-NEXT:  .LBB2_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB2_3: # %entry
+; RV32IM-NEXT:    li a2, 32
+; RV32IM-NEXT:    bnez a0, .LBB2_2
+; RV32IM-NEXT:  .LBB2_4: # %entry
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_umin_3:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    li a3, 32
+; RV64IM-NEXT:    bgeu a2, a3, .LBB2_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB2_4
+; RV64IM-NEXT:  .LBB2_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB2_3: # %entry
+; RV64IM-NEXT:    li a2, 32
+; RV64IM-NEXT:    bnez a0, .LBB2_2
+; RV64IM-NEXT:  .LBB2_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_umin_3:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    bnez a0, .LBB2_2
+; RV32IMZBB-NEXT:  # %bb.1: # %entry
+; RV32IMZBB-NEXT:    li a0, 32
+; RV32IMZBB-NEXT:    minu a1, a1, a0
+; RV32IMZBB-NEXT:  .LBB2_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_umin_3:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    bnez a0, .LBB2_2
+; RV64IMZBB-NEXT:  # %bb.1: # %entry
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    li a0, 32
+; RV64IMZBB-NEXT:    minu a1, a1, a0
+; RV64IMZBB-NEXT:  .LBB2_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_umin_3:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    sltiu a2, a1, 32
+; RV32IMZICOND-NEXT:    addi a3, a1, -32
+; RV32IMZICOND-NEXT:    czero.eqz a2, a3, a2
+; RV32IMZICOND-NEXT:    addi a2, a2, 32
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    or a0, a1, a0
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_umin_3:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a2, a1
+; RV64IMZICOND-NEXT:    sltiu a3, a2, 32
+; RV64IMZICOND-NEXT:    addi a2, a2, -32
+; RV64IMZICOND-NEXT:    czero.eqz a2, a2, a3
+; RV64IMZICOND-NEXT:    addi a2, a2, 32
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    or a0, a1, a0
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_umin_3:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    li a2, 32
+; RV32IMBOTH-NEXT:    minu a2, a1, a2
+; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_umin_3:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a2, a1
+; RV64IMBOTH-NEXT:    li a3, 32
+; RV64IMBOTH-NEXT:    minu a2, a2, a3
+; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.umin(i32 %a, i32 32)
+  %res = select i1 %cond, i32 %a, i32 %c
+  ret i32 %res
+}
+
+define i32 @select_umin_4(i1 zeroext %cond, i32 %x) {
+; RV32IM-LABEL: select_umin_4:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 128
+; RV32IM-NEXT:    bgeu a1, a2, .LBB3_3
+; RV32IM-NEXT:  # %bb.1:
+; RV32IM-NEXT:    beqz a0, .LBB3_4
+; RV32IM-NEXT:  .LBB3_2:
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB3_3:
+; RV32IM-NEXT:    li a1, 128
+; RV32IM-NEXT:    bnez a0, .LBB3_2
+; RV32IM-NEXT:  .LBB3_4:
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_umin_4:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    li a1, 128
+; RV64IM-NEXT:    bgeu a2, a1, .LBB3_3
+; RV64IM-NEXT:  # %bb.1:
+; RV64IM-NEXT:    beqz a0, .LBB3_4
+; RV64IM-NEXT:  .LBB3_2:
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB3_3:
+; RV64IM-NEXT:    li a2, 128
+; RV64IM-NEXT:    bnez a0, .LBB3_2
+; RV64IM-NEXT:  .LBB3_4:
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_umin_4:
+; RV32IMZBB:       # %bb.0:
+; RV32IMZBB-NEXT:    mv a2, a0
+; RV32IMZBB-NEXT:    li a0, 128
+; RV32IMZBB-NEXT:    bnez a2, .LBB3_2
+; RV32IMZBB-NEXT:  # %bb.1:
+; RV32IMZBB-NEXT:    minu a0, a1, a0
+; RV32IMZBB-NEXT:  .LBB3_2:
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_umin_4:
+; RV64IMZBB:       # %bb.0:
+; RV64IMZBB-NEXT:    mv a2, a0
+; RV64IMZBB-NEXT:    li a0, 128
+; RV64IMZBB-NEXT:    bnez a2, .LBB3_2
+; RV64IMZBB-NEXT:  # %bb.1:
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    minu a0, a1, a0
+; RV64IMZBB-NEXT:  .LBB3_2:
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_umin_4:
+; RV32IMZICOND:       # %bb.0:
+; RV32IMZICOND-NEXT:    sltiu a2, a1, 128
+; RV32IMZICOND-NEXT:    addi a1, a1, -128
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV32IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT:    addi a0, a0, 128
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_umin_4:
+; RV64IMZICOND:       # %bb.0:
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    sltiu a2, a1, 128
+; RV64IMZICOND-NEXT:    addi a1, a1, -128
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV64IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT:    addi a0, a0, 128
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_umin_4:
+; RV32IMBOTH:       # %bb.0:
+; RV32IMBOTH-NEXT:    li a2, 128
+; RV32IMBOTH-NEXT:    minu a1, a1, a2
+; RV32IMBOTH-NEXT:    addi a1, a1, -128
+; RV32IMBOTH-NEXT:    czero.nez a0, a1, a0
+; RV32IMBOTH-NEXT:    addi a0, a0, 128
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_umin_4:
+; RV64IMBOTH:       # %bb.0:
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    li a2, 128
+; RV64IMBOTH-NEXT:    minu a1, a1, a2
+; RV64IMBOTH-NEXT:    addi a1, a1, -128
+; RV64IMBOTH-NEXT:    czero.nez a0, a1, a0
+; RV64IMBOTH-NEXT:    addi a0, a0, 128
+; RV64IMBOTH-NEXT:    ret
+  %minmax = call i32 @llvm.umin(i32 %x, i32 128)
+  %sel = select i1 %cond, i32 128, i32 %minmax
+  ret i32 %sel
+}
+
+define i32 @select_umax_1(i1 zeroext %cond, i32 %a, i32 %b) {
+; RV32IM-LABEL: select_umax_1:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    bgeu a2, a1, .LBB4_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB4_4
+; RV32IM-NEXT:  .LBB4_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB4_3: # %entry
+; RV32IM-NEXT:    mv a1, a2
+; RV32IM-NEXT:    bnez a0, .LBB4_2
+; RV32IM-NEXT:  .LBB4_4: # %entry
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_umax_1:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a1, a1
+; RV64IM-NEXT:    sext.w a3, a2
+; RV64IM-NEXT:    bgeu a3, a1, .LBB4_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB4_4
+; RV64IM-NEXT:  .LBB4_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB4_3: # %entry
+; RV64IM-NEXT:    mv a1, a3
+; RV64IM-NEXT:    bnez a0, .LBB4_2
+; RV64IM-NEXT:  .LBB4_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_umax_1:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    beqz a0, .LBB4_2
+; RV32IMZBB-NEXT:  # %bb.1:
+; RV32IMZBB-NEXT:    maxu a2, a1, a2
+; RV32IMZBB-NEXT:  .LBB4_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a2
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_umax_1:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    beqz a0, .LBB4_2
+; RV64IMZBB-NEXT:  # %bb.1:
+; RV64IMZBB-NEXT:    sext.w a2, a2
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    maxu a2, a1, a2
+; RV64IMZBB-NEXT:  .LBB4_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a2
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_umax_1:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    sltu a3, a2, a1
+; RV32IMZICOND-NEXT:    czero.nez a4, a2, a3
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a3
+; RV32IMZICOND-NEXT:    or a1, a1, a4
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    or a0, a1, a0
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_umax_1:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    sext.w a3, a2
+; RV64IMZICOND-NEXT:    sltu a4, a3, a1
+; RV64IMZICOND-NEXT:    czero.nez a3, a3, a4
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a4
+; RV64IMZICOND-NEXT:    or a1, a1, a3
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    or a0, a1, a0
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_umax_1:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    maxu a1, a1, a2
+; RV32IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV32IMBOTH-NEXT:    czero.eqz a0, a1, a0
+; RV32IMBOTH-NEXT:    or a0, a0, a2
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_umax_1:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a3, a2
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    maxu a1, a1, a3
+; RV64IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV64IMBOTH-NEXT:    czero.eqz a0, a1, a0
+; RV64IMBOTH-NEXT:    or a0, a0, a2
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.umax(i32 %a, i32 %b)
+  %res = select i1 %cond, i32 %c, i32 %b
+  ret i32 %res
+}
+
+define i32 @select_umax_2(i1 zeroext %cond, i32 %a, i32 %b) {
+; RV32IM-LABEL: select_umax_2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    mv a3, a1
+; RV32IM-NEXT:    bgeu a2, a1, .LBB5_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB5_4
+; RV32IM-NEXT:  .LBB5_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB5_3: # %entry
+; RV32IM-NEXT:    mv a3, a2
+; RV32IM-NEXT:    bnez a0, .LBB5_2
+; RV32IM-NEXT:  .LBB5_4: # %entry
+; RV32IM-NEXT:    mv a0, a3
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_umax_2:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a3, a1
+; RV64IM-NEXT:    sext.w a2, a2
+; RV64IM-NEXT:    bgeu a2, a3, .LBB5_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB5_4
+; RV64IM-NEXT:  .LBB5_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB5_3: # %entry
+; RV64IM-NEXT:    mv a3, a2
+; RV64IM-NEXT:    bnez a0, .LBB5_2
+; RV64IM-NEXT:  .LBB5_4: # %entry
+; RV64IM-NEXT:    mv a0, a3
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_umax_2:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    bnez a0, .LBB5_2
+; RV32IMZBB-NEXT:  # %bb.1: # %entry
+; RV32IMZBB-NEXT:    maxu a1, a1, a2
+; RV32IMZBB-NEXT:  .LBB5_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_umax_2:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    bnez a0, .LBB5_2
+; RV64IMZBB-NEXT:  # %bb.1: # %entry
+; RV64IMZBB-NEXT:    sext.w a2, a2
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    maxu a1, a1, a2
+; RV64IMZBB-NEXT:  .LBB5_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_umax_2:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    sltu a3, a2, a1
+; RV32IMZICOND-NEXT:    czero.nez a2, a2, a3
+; RV32IMZICOND-NEXT:    czero.eqz a3, a1, a3
+; RV32IMZICOND-NEXT:    or a2, a3, a2
+; RV32IMZICOND-NEXT:    czero.nez a2, a2, a0
+; RV32IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV32IMZICOND-NEXT:    or a0, a0, a2
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_umax_2:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a3, a1
+; RV64IMZICOND-NEXT:    sext.w a2, a2
+; RV64IMZICOND-NEXT:    sltu a4, a2, a3
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a4
+; RV64IMZICOND-NEXT:    czero.eqz a3, a3, a4
+; RV64IMZICOND-NEXT:    or a2, a3, a2
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a0
+; RV64IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV64IMZICOND-NEXT:    or a0, a0, a2
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_umax_2:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    maxu a2, a1, a2
+; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_umax_2:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a2, a2
+; RV64IMBOTH-NEXT:    sext.w a3, a1
+; RV64IMBOTH-NEXT:    maxu a2, a3, a2
+; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.umax(i32 %a, i32 %b)
+  %res = select i1 %cond, i32 %a, i32 %c
+  ret i32 %res
+}
+
+define i32 @select_umax_3(i1 zeroext %cond, i32 %a) {
+; RV32IM-LABEL: select_umax_3:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a3, 32
+; RV32IM-NEXT:    mv a2, a1
+; RV32IM-NEXT:    bgeu a3, a1, .LBB6_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB6_4
+; RV32IM-NEXT:  .LBB6_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB6_3: # %entry
+; RV32IM-NEXT:    li a2, 32
+; RV32IM-NEXT:    bnez a0, .LBB6_2
+; RV32IM-NEXT:  .LBB6_4: # %entry
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_umax_3:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    li a3, 32
+; RV64IM-NEXT:    bgeu a3, a2, .LBB6_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB6_4
+; RV64IM-NEXT:  .LBB6_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB6_3: # %entry
+; RV64IM-NEXT:    li a2, 32
+; RV64IM-NEXT:    bnez a0, .LBB6_2
+; RV64IM-NEXT:  .LBB6_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_umax_3:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    bnez a0, .LBB6_2
+; RV32IMZBB-NEXT:  # %bb.1: # %entry
+; RV32IMZBB-NEXT:    li a0, 32
+; RV32IMZBB-NEXT:    maxu a1, a1, a0
+; RV32IMZBB-NEXT:  .LBB6_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_umax_3:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    bnez a0, .LBB6_2
+; RV64IMZBB-NEXT:  # %bb.1: # %entry
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    li a0, 32
+; RV64IMZBB-NEXT:    maxu a1, a1, a0
+; RV64IMZBB-NEXT:  .LBB6_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_umax_3:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    sltiu a2, a1, 33
+; RV32IMZICOND-NEXT:    addi a3, a1, -32
+; RV32IMZICOND-NEXT:    czero.nez a2, a3, a2
+; RV32IMZICOND-NEXT:    addi a2, a2, 32
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    or a0, a1, a0
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_umax_3:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a2, a1
+; RV64IMZICOND-NEXT:    sltiu a3, a2, 33
+; RV64IMZICOND-NEXT:    addi a2, a2, -32
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a3
+; RV64IMZICOND-NEXT:    addi a2, a2, 32
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    or a0, a1, a0
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_umax_3:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    li a2, 32
+; RV32IMBOTH-NEXT:    maxu a2, a1, a2
+; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_umax_3:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a2, a1
+; RV64IMBOTH-NEXT:    li a3, 32
+; RV64IMBOTH-NEXT:    maxu a2, a2, a3
+; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.umax(i32 %a, i32 32)
+  %res = select i1 %cond, i32 %a, i32 %c
+  ret i32 %res
+}
+
+define i32 @select_umax_4(i1 zeroext %cond, i32 %x) {
+; RV32IM-LABEL: select_umax_4:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 128
+; RV32IM-NEXT:    bgeu a2, a1, .LBB7_3
+; RV32IM-NEXT:  # %bb.1:
+; RV32IM-NEXT:    beqz a0, .LBB7_4
+; RV32IM-NEXT:  .LBB7_2:
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB7_3:
+; RV32IM-NEXT:    li a1, 128
+; RV32IM-NEXT:    bnez a0, .LBB7_2
+; RV32IM-NEXT:  .LBB7_4:
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_umax_4:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    li a1, 128
+; RV64IM-NEXT:    bgeu a1, a2, .LBB7_3
+; RV64IM-NEXT:  # %bb.1:
+; RV64IM-NEXT:    beqz a0, .LBB7_4
+; RV64IM-NEXT:  .LBB7_2:
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB7_3:
+; RV64IM-NEXT:    li a2, 128
+; RV64IM-NEXT:    bnez a0, .LBB7_2
+; RV64IM-NEXT:  .LBB7_4:
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_umax_4:
+; RV32IMZBB:       # %bb.0:
+; RV32IMZBB-NEXT:    mv a2, a0
+; RV32IMZBB-NEXT:    li a0, 128
+; RV32IMZBB-NEXT:    bnez a2, .LBB7_2
+; RV32IMZBB-NEXT:  # %bb.1:
+; RV32IMZBB-NEXT:    maxu a0, a1, a0
+; RV32IMZBB-NEXT:  .LBB7_2:
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_umax_4:
+; RV64IMZBB:       # %bb.0:
+; RV64IMZBB-NEXT:    mv a2, a0
+; RV64IMZBB-NEXT:    li a0, 128
+; RV64IMZBB-NEXT:    bnez a2, .LBB7_2
+; RV64IMZBB-NEXT:  # %bb.1:
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    maxu a0, a1, a0
+; RV64IMZBB-NEXT:  .LBB7_2:
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_umax_4:
+; RV32IMZICOND:       # %bb.0:
+; RV32IMZICOND-NEXT:    sltiu a2, a1, 129
+; RV32IMZICOND-NEXT:    addi a1, a1, -128
+; RV32IMZICOND-NEXT:    czero.nez a1, a1, a2
+; RV32IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT:    addi a0, a0, 128
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_umax_4:
+; RV64IMZICOND:       # %bb.0:
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    sltiu a2, a1, 129
+; RV64IMZICOND-NEXT:    addi a1, a1, -128
+; RV64IMZICOND-NEXT:    czero.nez a1, a1, a2
+; RV64IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT:    addi a0, a0, 128
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_umax_4:
+; RV32IMBOTH:       # %bb.0:
+; RV32IMBOTH-NEXT:    li a2, 128
+; RV32IMBOTH-NEXT:    maxu a1, a1, a2
+; RV32IMBOTH-NEXT:    addi a1, a1, -128
+; RV32IMBOTH-NEXT:    czero.nez a0, a1, a0
+; RV32IMBOTH-NEXT:    addi a0, a0, 128
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_umax_4:
+; RV64IMBOTH:       # %bb.0:
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    li a2, 128
+; RV64IMBOTH-NEXT:    maxu a1, a1, a2
+; RV64IMBOTH-NEXT:    addi a1, a1, -128
+; RV64IMBOTH-NEXT:    czero.nez a0, a1, a0
+; RV64IMBOTH-NEXT:    addi a0, a0, 128
+; RV64IMBOTH-NEXT:    ret
+  %minmax = call i32 @llvm.umax(i32 %x, i32 128)
+  %sel = select i1 %cond, i32 128, i32 %minmax
+  ret i32 %sel
+}
+
+define i32 @select_smin_1(i1 zeroext %cond, i32 %a, i32 %b) {
+; RV32IM-LABEL: select_smin_1:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    bge a1, a2, .LBB8_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB8_4
+; RV32IM-NEXT:  .LBB8_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB8_3: # %entry
+; RV32IM-NEXT:    mv a1, a2
+; RV32IM-NEXT:    bnez a0, .LBB8_2
+; RV32IM-NEXT:  .LBB8_4: # %entry
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_smin_1:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a3, a2
+; RV64IM-NEXT:    sext.w a1, a1
+; RV64IM-NEXT:    bge a1, a3, .LBB8_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB8_4
+; RV64IM-NEXT:  .LBB8_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB8_3: # %entry
+; RV64IM-NEXT:    mv a1, a3
+; RV64IM-NEXT:    bnez a0, .LBB8_2
+; RV64IM-NEXT:  .LBB8_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_smin_1:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    beqz a0, .LBB8_2
+; RV32IMZBB-NEXT:  # %bb.1:
+; RV32IMZBB-NEXT:    min a2, a1, a2
+; RV32IMZBB-NEXT:  .LBB8_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a2
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_smin_1:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    beqz a0, .LBB8_2
+; RV64IMZBB-NEXT:  # %bb.1:
+; RV64IMZBB-NEXT:    sext.w a2, a2
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    min a2, a1, a2
+; RV64IMZBB-NEXT:  .LBB8_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a2
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_smin_1:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    slt a3, a1, a2
+; RV32IMZICOND-NEXT:    czero.nez a4, a2, a3
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a3
+; RV32IMZICOND-NEXT:    or a1, a1, a4
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    or a0, a1, a0
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_smin_1:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a3, a2
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    slt a4, a1, a3
+; RV64IMZICOND-NEXT:    czero.nez a3, a3, a4
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a4
+; RV64IMZICOND-NEXT:    or a1, a1, a3
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    or a0, a1, a0
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_smin_1:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    min a1, a1, a2
+; RV32IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV32IMBOTH-NEXT:    czero.eqz a0, a1, a0
+; RV32IMBOTH-NEXT:    or a0, a0, a2
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_smin_1:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a3, a2
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    min a1, a1, a3
+; RV64IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV64IMBOTH-NEXT:    czero.eqz a0, a1, a0
+; RV64IMBOTH-NEXT:    or a0, a0, a2
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.smin(i32 %a, i32 %b)
+  %res = select i1 %cond, i32 %c, i32 %b
+  ret i32 %res
+}
+
+define i32 @select_smin_2(i1 zeroext %cond, i32 %a, i32 %b) {
+; RV32IM-LABEL: select_smin_2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    mv a3, a1
+; RV32IM-NEXT:    bge a1, a2, .LBB9_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB9_4
+; RV32IM-NEXT:  .LBB9_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB9_3: # %entry
+; RV32IM-NEXT:    mv a3, a2
+; RV32IM-NEXT:    bnez a0, .LBB9_2
+; RV32IM-NEXT:  .LBB9_4: # %entry
+; RV32IM-NEXT:    mv a0, a3
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_smin_2:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a3, a2
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    bge a2, a3, .LBB9_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB9_4
+; RV64IM-NEXT:  .LBB9_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB9_3: # %entry
+; RV64IM-NEXT:    mv a2, a3
+; RV64IM-NEXT:    bnez a0, .LBB9_2
+; RV64IM-NEXT:  .LBB9_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_smin_2:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    bnez a0, .LBB9_2
+; RV32IMZBB-NEXT:  # %bb.1: # %entry
+; RV32IMZBB-NEXT:    min a1, a1, a2
+; RV32IMZBB-NEXT:  .LBB9_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_smin_2:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    bnez a0, .LBB9_2
+; RV64IMZBB-NEXT:  # %bb.1: # %entry
+; RV64IMZBB-NEXT:    sext.w a2, a2
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    min a1, a1, a2
+; RV64IMZBB-NEXT:  .LBB9_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_smin_2:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    slt a3, a1, a2
+; RV32IMZICOND-NEXT:    czero.nez a2, a2, a3
+; RV32IMZICOND-NEXT:    czero.eqz a3, a1, a3
+; RV32IMZICOND-NEXT:    or a2, a3, a2
+; RV32IMZICOND-NEXT:    czero.nez a2, a2, a0
+; RV32IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV32IMZICOND-NEXT:    or a0, a0, a2
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_smin_2:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a2, a2
+; RV64IMZICOND-NEXT:    sext.w a3, a1
+; RV64IMZICOND-NEXT:    slt a4, a3, a2
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a4
+; RV64IMZICOND-NEXT:    czero.eqz a3, a3, a4
+; RV64IMZICOND-NEXT:    or a2, a3, a2
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a0
+; RV64IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV64IMZICOND-NEXT:    or a0, a0, a2
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_smin_2:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    min a2, a1, a2
+; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_smin_2:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a2, a2
+; RV64IMBOTH-NEXT:    sext.w a3, a1
+; RV64IMBOTH-NEXT:    min a2, a3, a2
+; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.smin(i32 %a, i32 %b)
+  %res = select i1 %cond, i32 %a, i32 %c
+  ret i32 %res
+}
+
+define i32 @select_smin_3(i1 zeroext %cond, i32 %a) {
+; RV32IM-LABEL: select_smin_3:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a3, 32
+; RV32IM-NEXT:    mv a2, a1
+; RV32IM-NEXT:    bge a1, a3, .LBB10_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB10_4
+; RV32IM-NEXT:  .LBB10_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB10_3: # %entry
+; RV32IM-NEXT:    li a2, 32
+; RV32IM-NEXT:    bnez a0, .LBB10_2
+; RV32IM-NEXT:  .LBB10_4: # %entry
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_smin_3:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    li a3, 32
+; RV64IM-NEXT:    bge a2, a3, .LBB10_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB10_4
+; RV64IM-NEXT:  .LBB10_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB10_3: # %entry
+; RV64IM-NEXT:    li a2, 32
+; RV64IM-NEXT:    bnez a0, .LBB10_2
+; RV64IM-NEXT:  .LBB10_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_smin_3:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    bnez a0, .LBB10_2
+; RV32IMZBB-NEXT:  # %bb.1: # %entry
+; RV32IMZBB-NEXT:    li a0, 32
+; RV32IMZBB-NEXT:    min a1, a1, a0
+; RV32IMZBB-NEXT:  .LBB10_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_smin_3:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    bnez a0, .LBB10_2
+; RV64IMZBB-NEXT:  # %bb.1: # %entry
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    li a0, 32
+; RV64IMZBB-NEXT:    min a1, a1, a0
+; RV64IMZBB-NEXT:  .LBB10_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_smin_3:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    slti a2, a1, 32
+; RV32IMZICOND-NEXT:    addi a3, a1, -32
+; RV32IMZICOND-NEXT:    czero.eqz a2, a3, a2
+; RV32IMZICOND-NEXT:    addi a2, a2, 32
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    or a0, a1, a0
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_smin_3:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a2, a1
+; RV64IMZICOND-NEXT:    slti a3, a2, 32
+; RV64IMZICOND-NEXT:    addi a2, a2, -32
+; RV64IMZICOND-NEXT:    czero.eqz a2, a2, a3
+; RV64IMZICOND-NEXT:    addi a2, a2, 32
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    or a0, a1, a0
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_smin_3:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    li a2, 32
+; RV32IMBOTH-NEXT:    min a2, a1, a2
+; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_smin_3:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a2, a1
+; RV64IMBOTH-NEXT:    li a3, 32
+; RV64IMBOTH-NEXT:    min a2, a2, a3
+; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.smin(i32 %a, i32 32)
+  %res = select i1 %cond, i32 %a, i32 %c
+  ret i32 %res
+}
+
+define i32 @select_smin_4(i1 zeroext %cond, i32 %x) {
+; RV32IM-LABEL: select_smin_4:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 128
+; RV32IM-NEXT:    bge a1, a2, .LBB11_3
+; RV32IM-NEXT:  # %bb.1:
+; RV32IM-NEXT:    beqz a0, .LBB11_4
+; RV32IM-NEXT:  .LBB11_2:
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB11_3:
+; RV32IM-NEXT:    li a1, 128
+; RV32IM-NEXT:    bnez a0, .LBB11_2
+; RV32IM-NEXT:  .LBB11_4:
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_smin_4:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    li a1, 128
+; RV64IM-NEXT:    bge a2, a1, .LBB11_3
+; RV64IM-NEXT:  # %bb.1:
+; RV64IM-NEXT:    beqz a0, .LBB11_4
+; RV64IM-NEXT:  .LBB11_2:
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB11_3:
+; RV64IM-NEXT:    li a2, 128
+; RV64IM-NEXT:    bnez a0, .LBB11_2
+; RV64IM-NEXT:  .LBB11_4:
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_smin_4:
+; RV32IMZBB:       # %bb.0:
+; RV32IMZBB-NEXT:    mv a2, a0
+; RV32IMZBB-NEXT:    li a0, 128
+; RV32IMZBB-NEXT:    bnez a2, .LBB11_2
+; RV32IMZBB-NEXT:  # %bb.1:
+; RV32IMZBB-NEXT:    min a0, a1, a0
+; RV32IMZBB-NEXT:  .LBB11_2:
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_smin_4:
+; RV64IMZBB:       # %bb.0:
+; RV64IMZBB-NEXT:    mv a2, a0
+; RV64IMZBB-NEXT:    li a0, 128
+; RV64IMZBB-NEXT:    bnez a2, .LBB11_2
+; RV64IMZBB-NEXT:  # %bb.1:
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    min a0, a1, a0
+; RV64IMZBB-NEXT:  .LBB11_2:
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_smin_4:
+; RV32IMZICOND:       # %bb.0:
+; RV32IMZICOND-NEXT:    slti a2, a1, 128
+; RV32IMZICOND-NEXT:    addi a1, a1, -128
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV32IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT:    addi a0, a0, 128
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_smin_4:
+; RV64IMZICOND:       # %bb.0:
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    slti a2, a1, 128
+; RV64IMZICOND-NEXT:    addi a1, a1, -128
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a2
+; RV64IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT:    addi a0, a0, 128
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_smin_4:
+; RV32IMBOTH:       # %bb.0:
+; RV32IMBOTH-NEXT:    li a2, 128
+; RV32IMBOTH-NEXT:    min a1, a1, a2
+; RV32IMBOTH-NEXT:    addi a1, a1, -128
+; RV32IMBOTH-NEXT:    czero.nez a0, a1, a0
+; RV32IMBOTH-NEXT:    addi a0, a0, 128
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_smin_4:
+; RV64IMBOTH:       # %bb.0:
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    li a2, 128
+; RV64IMBOTH-NEXT:    min a1, a1, a2
+; RV64IMBOTH-NEXT:    addi a1, a1, -128
+; RV64IMBOTH-NEXT:    czero.nez a0, a1, a0
+; RV64IMBOTH-NEXT:    addi a0, a0, 128
+; RV64IMBOTH-NEXT:    ret
+  %minmax = call i32 @llvm.smin(i32 %x, i32 128)
+  %sel = select i1 %cond, i32 128, i32 %minmax
+  ret i32 %sel
+}
+
+define i32 @select_smax_1(i1 zeroext %cond, i32 %a, i32 %b) {
+; RV32IM-LABEL: select_smax_1:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    bge a2, a1, .LBB12_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB12_4
+; RV32IM-NEXT:  .LBB12_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB12_3: # %entry
+; RV32IM-NEXT:    mv a1, a2
+; RV32IM-NEXT:    bnez a0, .LBB12_2
+; RV32IM-NEXT:  .LBB12_4: # %entry
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_smax_1:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a1, a1
+; RV64IM-NEXT:    sext.w a3, a2
+; RV64IM-NEXT:    bge a3, a1, .LBB12_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB12_4
+; RV64IM-NEXT:  .LBB12_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB12_3: # %entry
+; RV64IM-NEXT:    mv a1, a3
+; RV64IM-NEXT:    bnez a0, .LBB12_2
+; RV64IM-NEXT:  .LBB12_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_smax_1:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    beqz a0, .LBB12_2
+; RV32IMZBB-NEXT:  # %bb.1:
+; RV32IMZBB-NEXT:    max a2, a1, a2
+; RV32IMZBB-NEXT:  .LBB12_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a2
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_smax_1:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    beqz a0, .LBB12_2
+; RV64IMZBB-NEXT:  # %bb.1:
+; RV64IMZBB-NEXT:    sext.w a2, a2
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    max a2, a1, a2
+; RV64IMZBB-NEXT:  .LBB12_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a2
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_smax_1:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    slt a3, a2, a1
+; RV32IMZICOND-NEXT:    czero.nez a4, a2, a3
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a3
+; RV32IMZICOND-NEXT:    or a1, a1, a4
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    or a0, a1, a0
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_smax_1:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    sext.w a3, a2
+; RV64IMZICOND-NEXT:    slt a4, a3, a1
+; RV64IMZICOND-NEXT:    czero.nez a3, a3, a4
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a4
+; RV64IMZICOND-NEXT:    or a1, a1, a3
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    or a0, a1, a0
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_smax_1:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    max a1, a1, a2
+; RV32IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV32IMBOTH-NEXT:    czero.eqz a0, a1, a0
+; RV32IMBOTH-NEXT:    or a0, a0, a2
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_smax_1:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a3, a2
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    max a1, a1, a3
+; RV64IMBOTH-NEXT:    czero.nez a2, a2, a0
+; RV64IMBOTH-NEXT:    czero.eqz a0, a1, a0
+; RV64IMBOTH-NEXT:    or a0, a0, a2
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.smax(i32 %a, i32 %b)
+  %res = select i1 %cond, i32 %c, i32 %b
+  ret i32 %res
+}
+
+define i32 @select_smax_2(i1 zeroext %cond, i32 %a, i32 %b) {
+; RV32IM-LABEL: select_smax_2:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    mv a3, a1
+; RV32IM-NEXT:    bge a2, a1, .LBB13_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB13_4
+; RV32IM-NEXT:  .LBB13_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB13_3: # %entry
+; RV32IM-NEXT:    mv a3, a2
+; RV32IM-NEXT:    bnez a0, .LBB13_2
+; RV32IM-NEXT:  .LBB13_4: # %entry
+; RV32IM-NEXT:    mv a0, a3
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_smax_2:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a3, a1
+; RV64IM-NEXT:    sext.w a2, a2
+; RV64IM-NEXT:    bge a2, a3, .LBB13_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB13_4
+; RV64IM-NEXT:  .LBB13_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB13_3: # %entry
+; RV64IM-NEXT:    mv a3, a2
+; RV64IM-NEXT:    bnez a0, .LBB13_2
+; RV64IM-NEXT:  .LBB13_4: # %entry
+; RV64IM-NEXT:    mv a0, a3
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_smax_2:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    bnez a0, .LBB13_2
+; RV32IMZBB-NEXT:  # %bb.1: # %entry
+; RV32IMZBB-NEXT:    max a1, a1, a2
+; RV32IMZBB-NEXT:  .LBB13_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_smax_2:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    bnez a0, .LBB13_2
+; RV64IMZBB-NEXT:  # %bb.1: # %entry
+; RV64IMZBB-NEXT:    sext.w a2, a2
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    max a1, a1, a2
+; RV64IMZBB-NEXT:  .LBB13_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_smax_2:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    slt a3, a2, a1
+; RV32IMZICOND-NEXT:    czero.nez a2, a2, a3
+; RV32IMZICOND-NEXT:    czero.eqz a3, a1, a3
+; RV32IMZICOND-NEXT:    or a2, a3, a2
+; RV32IMZICOND-NEXT:    czero.nez a2, a2, a0
+; RV32IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV32IMZICOND-NEXT:    or a0, a0, a2
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_smax_2:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a3, a1
+; RV64IMZICOND-NEXT:    sext.w a2, a2
+; RV64IMZICOND-NEXT:    slt a4, a2, a3
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a4
+; RV64IMZICOND-NEXT:    czero.eqz a3, a3, a4
+; RV64IMZICOND-NEXT:    or a2, a3, a2
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a0
+; RV64IMZICOND-NEXT:    czero.eqz a0, a1, a0
+; RV64IMZICOND-NEXT:    or a0, a0, a2
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_smax_2:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    max a2, a1, a2
+; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_smax_2:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a2, a2
+; RV64IMBOTH-NEXT:    sext.w a3, a1
+; RV64IMBOTH-NEXT:    max a2, a3, a2
+; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.smax(i32 %a, i32 %b)
+  %res = select i1 %cond, i32 %a, i32 %c
+  ret i32 %res
+}
+
+define i32 @select_smax_3(i1 zeroext %cond, i32 %a) {
+; RV32IM-LABEL: select_smax_3:
+; RV32IM:       # %bb.0: # %entry
+; RV32IM-NEXT:    li a3, 32
+; RV32IM-NEXT:    mv a2, a1
+; RV32IM-NEXT:    bge a3, a1, .LBB14_3
+; RV32IM-NEXT:  # %bb.1: # %entry
+; RV32IM-NEXT:    beqz a0, .LBB14_4
+; RV32IM-NEXT:  .LBB14_2: # %entry
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB14_3: # %entry
+; RV32IM-NEXT:    li a2, 32
+; RV32IM-NEXT:    bnez a0, .LBB14_2
+; RV32IM-NEXT:  .LBB14_4: # %entry
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_smax_3:
+; RV64IM:       # %bb.0: # %entry
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    li a3, 32
+; RV64IM-NEXT:    bge a3, a2, .LBB14_3
+; RV64IM-NEXT:  # %bb.1: # %entry
+; RV64IM-NEXT:    beqz a0, .LBB14_4
+; RV64IM-NEXT:  .LBB14_2: # %entry
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB14_3: # %entry
+; RV64IM-NEXT:    li a2, 32
+; RV64IM-NEXT:    bnez a0, .LBB14_2
+; RV64IM-NEXT:  .LBB14_4: # %entry
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_smax_3:
+; RV32IMZBB:       # %bb.0: # %entry
+; RV32IMZBB-NEXT:    bnez a0, .LBB14_2
+; RV32IMZBB-NEXT:  # %bb.1: # %entry
+; RV32IMZBB-NEXT:    li a0, 32
+; RV32IMZBB-NEXT:    max a1, a1, a0
+; RV32IMZBB-NEXT:  .LBB14_2: # %entry
+; RV32IMZBB-NEXT:    mv a0, a1
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_smax_3:
+; RV64IMZBB:       # %bb.0: # %entry
+; RV64IMZBB-NEXT:    bnez a0, .LBB14_2
+; RV64IMZBB-NEXT:  # %bb.1: # %entry
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    li a0, 32
+; RV64IMZBB-NEXT:    max a1, a1, a0
+; RV64IMZBB-NEXT:  .LBB14_2: # %entry
+; RV64IMZBB-NEXT:    mv a0, a1
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_smax_3:
+; RV32IMZICOND:       # %bb.0: # %entry
+; RV32IMZICOND-NEXT:    slti a2, a1, 33
+; RV32IMZICOND-NEXT:    addi a3, a1, -32
+; RV32IMZICOND-NEXT:    czero.nez a2, a3, a2
+; RV32IMZICOND-NEXT:    addi a2, a2, 32
+; RV32IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV32IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV32IMZICOND-NEXT:    or a0, a1, a0
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_smax_3:
+; RV64IMZICOND:       # %bb.0: # %entry
+; RV64IMZICOND-NEXT:    sext.w a2, a1
+; RV64IMZICOND-NEXT:    slti a3, a2, 33
+; RV64IMZICOND-NEXT:    addi a2, a2, -32
+; RV64IMZICOND-NEXT:    czero.nez a2, a2, a3
+; RV64IMZICOND-NEXT:    addi a2, a2, 32
+; RV64IMZICOND-NEXT:    czero.eqz a1, a1, a0
+; RV64IMZICOND-NEXT:    czero.nez a0, a2, a0
+; RV64IMZICOND-NEXT:    or a0, a1, a0
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_smax_3:
+; RV32IMBOTH:       # %bb.0: # %entry
+; RV32IMBOTH-NEXT:    li a2, 32
+; RV32IMBOTH-NEXT:    max a2, a1, a2
+; RV32IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV32IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV32IMBOTH-NEXT:    or a0, a1, a0
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_smax_3:
+; RV64IMBOTH:       # %bb.0: # %entry
+; RV64IMBOTH-NEXT:    sext.w a2, a1
+; RV64IMBOTH-NEXT:    li a3, 32
+; RV64IMBOTH-NEXT:    max a2, a2, a3
+; RV64IMBOTH-NEXT:    czero.eqz a1, a1, a0
+; RV64IMBOTH-NEXT:    czero.nez a0, a2, a0
+; RV64IMBOTH-NEXT:    or a0, a1, a0
+; RV64IMBOTH-NEXT:    ret
+entry:
+  %c = call i32 @llvm.smax(i32 %a, i32 32)
+  %res = select i1 %cond, i32 %a, i32 %c
+  ret i32 %res
+}
+
+define i32 @select_smax_4(i1 zeroext %cond, i32 %x) {
+; RV32IM-LABEL: select_smax_4:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 128
+; RV32IM-NEXT:    bge a2, a1, .LBB15_3
+; RV32IM-NEXT:  # %bb.1:
+; RV32IM-NEXT:    beqz a0, .LBB15_4
+; RV32IM-NEXT:  .LBB15_2:
+; RV32IM-NEXT:    mv a0, a2
+; RV32IM-NEXT:    ret
+; RV32IM-NEXT:  .LBB15_3:
+; RV32IM-NEXT:    li a1, 128
+; RV32IM-NEXT:    bnez a0, .LBB15_2
+; RV32IM-NEXT:  .LBB15_4:
+; RV32IM-NEXT:    mv a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64IM-LABEL: select_smax_4:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    sext.w a2, a1
+; RV64IM-NEXT:    li a1, 128
+; RV64IM-NEXT:    bge a1, a2, .LBB15_3
+; RV64IM-NEXT:  # %bb.1:
+; RV64IM-NEXT:    beqz a0, .LBB15_4
+; RV64IM-NEXT:  .LBB15_2:
+; RV64IM-NEXT:    mv a0, a1
+; RV64IM-NEXT:    ret
+; RV64IM-NEXT:  .LBB15_3:
+; RV64IM-NEXT:    li a2, 128
+; RV64IM-NEXT:    bnez a0, .LBB15_2
+; RV64IM-NEXT:  .LBB15_4:
+; RV64IM-NEXT:    mv a0, a2
+; RV64IM-NEXT:    ret
+;
+; RV32IMZBB-LABEL: select_smax_4:
+; RV32IMZBB:       # %bb.0:
+; RV32IMZBB-NEXT:    mv a2, a0
+; RV32IMZBB-NEXT:    li a0, 128
+; RV32IMZBB-NEXT:    bnez a2, .LBB15_2
+; RV32IMZBB-NEXT:  # %bb.1:
+; RV32IMZBB-NEXT:    max a0, a1, a0
+; RV32IMZBB-NEXT:  .LBB15_2:
+; RV32IMZBB-NEXT:    ret
+;
+; RV64IMZBB-LABEL: select_smax_4:
+; RV64IMZBB:       # %bb.0:
+; RV64IMZBB-NEXT:    mv a2, a0
+; RV64IMZBB-NEXT:    li a0, 128
+; RV64IMZBB-NEXT:    bnez a2, .LBB15_2
+; RV64IMZBB-NEXT:  # %bb.1:
+; RV64IMZBB-NEXT:    sext.w a1, a1
+; RV64IMZBB-NEXT:    max a0, a1, a0
+; RV64IMZBB-NEXT:  .LBB15_2:
+; RV64IMZBB-NEXT:    ret
+;
+; RV32IMZICOND-LABEL: select_smax_4:
+; RV32IMZICOND:       # %bb.0:
+; RV32IMZICOND-NEXT:    slti a2, a1, 129
+; RV32IMZICOND-NEXT:    addi a1, a1, -128
+; RV32IMZICOND-NEXT:    czero.nez a1, a1, a2
+; RV32IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT:    addi a0, a0, 128
+; RV32IMZICOND-NEXT:    ret
+;
+; RV64IMZICOND-LABEL: select_smax_4:
+; RV64IMZICOND:       # %bb.0:
+; RV64IMZICOND-NEXT:    sext.w a1, a1
+; RV64IMZICOND-NEXT:    slti a2, a1, 129
+; RV64IMZICOND-NEXT:    addi a1, a1, -128
+; RV64IMZICOND-NEXT:    czero.nez a1, a1, a2
+; RV64IMZICOND-NEXT:    czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT:    addi a0, a0, 128
+; RV64IMZICOND-NEXT:    ret
+;
+; RV32IMBOTH-LABEL: select_smax_4:
+; RV32IMBOTH:       # %bb.0:
+; RV32IMBOTH-NEXT:    li a2, 128
+; RV32IMBOTH-NEXT:    max a1, a1, a2
+; RV32IMBOTH-NEXT:    addi a1, a1, -128
+; RV32IMBOTH-NEXT:    czero.nez a0, a1, a0
+; RV32IMBOTH-NEXT:    addi a0, a0, 128
+; RV32IMBOTH-NEXT:    ret
+;
+; RV64IMBOTH-LABEL: select_smax_4:
+; RV64IMBOTH:       # %bb.0:
+; RV64IMBOTH-NEXT:    sext.w a1, a1
+; RV64IMBOTH-NEXT:    li a2, 128
+; RV64IMBOTH-NEXT:    max a1, a1, a2
+; RV64IMBOTH-NEXT:    addi a1, a1, -128
+; RV64IMBOTH-NEXT:    czero.nez a0, a1, a0
+; RV64IMBOTH-NEXT:    addi a0, a0, 128
+; RV64IMBOTH-NEXT:    ret
+  %minmax = call i32 @llvm.smax(i32 %x, i32 128)
+  %sel = select i1 %cond, i32 128, i32 %minmax
+  ret i32 %sel
+}
diff --git a/llvm/test/FileCheck/var-scope.txt b/llvm/test/FileCheck/var-scope.txt
index 9b3ea0e..b65eddb6 100644
--- a/llvm/test/FileCheck/var-scope.txt
+++ b/llvm/test/FileCheck/var-scope.txt
@@ -3,15 +3,15 @@
 
 ; Reference run: variables remain defined at all time when not using
 ; --enable-var-scope option.
-RUN: FileCheck --check-prefixes CHECK,LOCAL3,GLOBAL --input-file %s %s
+RUN: FileCheck --check-prefixes CHECK,CHECK-LOCAL-BOTH,CHECK-GLOBAL --input-file %s %s
 
-RUN: FileCheck --check-prefixes CHECK,GLOBAL --enable-var-scope --input-file %s %s
-RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,LOCAL1 --enable-var-scope --input-file %s %s 2>&1 \
-RUN:   | FileCheck --check-prefix ERRUNDEFLOCAL %s
-RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,LOCAL2 --enable-var-scope --input-file %s %s 2>&1 \
-RUN:   | FileCheck --check-prefix ERRUNDEFLOCNUM %s
-RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,LOCAL3 --enable-var-scope --input-file %s %s 2>&1 \
-RUN:   | FileCheck --check-prefixes ERRUNDEFLOCAL,ERRUNDEFLOCNUM %s
+RUN: FileCheck --check-prefixes CHECK,CHECK-GLOBAL --enable-var-scope --input-file %s %s
+RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,CHECK-LOCAL-TEXT --enable-var-scope --input-file %s %s 2>&1 \
+RUN:   | FileCheck --implicit-check-not "undefined variable:" --check-prefixes ERRUNDEF,ERRUNDEF-LOCAL %s
+RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,CHECK-LOCAL-NUM --enable-var-scope --input-file %s %s 2>&1 \
+RUN:   | FileCheck --implicit-check-not "undefined variable:" --check-prefixes ERRUNDEF,ERRUNDEF-LOCNUM %s
+RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,CHECK-LOCAL-BOTH --enable-var-scope --input-file %s %s 2>&1 \
+RUN:   | FileCheck --implicit-check-not "undefined variable:" --check-prefixes ERRUNDEF,ERRUNDEF-LOCAL,ERRUNDEF-LOCNUM %s
 
 local1
 global1
@@ -23,15 +23,47 @@ global2
 CHECK: [[LOCAL]][[#LOCNUM+1]]
 CHECK: [[$GLOBAL]][[#$GLOBNUM+1]]
 
-barrier:
-CHECK-LABEL: barrier
+// Barrier to clear local variables
+barrier1:
+CHECK-LABEL: barrier1
 
 local3
 global3
-LOCAL1: [[LOCAL]]3
-LOCAL2: local[[#LOCNUM+2]]
-LOCAL3: [[LOCAL]][[#LOCNUM+2]]
-GLOBAL: [[$GLOBAL]][[#$GLOBNUM+2]]
+CHECK-LOCAL-TEXT: [[LOCAL]]3
+CHECK-LOCAL-NUM: local[[#LOCNUM+2]]
+CHECK-LOCAL-BOTH: [[LOCAL]][[#LOCNUM+2]]
+CHECK-GLOBAL: [[$GLOBAL]][[#$GLOBNUM+2]]
 
-ERRUNDEFLOCAL: undefined variable: LOCAL
-ERRUNDEFLOCNUM: undefined variable: LOCNUM
+// Barrier to continue FileCheck execution even after the first fail
+barrier2:
+CHECK-LABEL: barrier2
+
+// Reassign the variables to check that clearing-after-reassigning works
+local4
+global4
+CHECK: [[LOCAL:loc[^[:digit:]]*]][[#LOCNUM:]]
+CHECK: [[$GLOBAL:glo[^[:digit:]]*]][[#$GLOBNUM:]]
+
+// Barrier to clear local variables
+barrier3:
+CHECK-LABEL: barrier3
+
+local5
+global5
+CHECK-LOCAL-TEXT: [[LOCAL]]5
+CHECK-LOCAL-NUM: local[[#LOCNUM+1]]
+CHECK-LOCAL-BOTH: [[LOCAL]][[#LOCNUM+1]]
+CHECK-GLOBAL: [[$GLOBAL]][[#$GLOBNUM+1]]
+
+
+// Check that the tests fail as expected
+ERRUNDEF-LOCAL: undefined variable: LOCAL
+ERRUNDEF-LOCNUM: undefined variable: LOCNUM
+ERRUNDEF-LOCAL: undefined variable: LOCAL
+ERRUNDEF-LOCNUM: undefined variable: LOCNUM
+
+// Look for "Input was:" to only match the error messages before the input-context.
+//
+// The regex /([[:space:]]|.)*/ matches all remaining characters,
+// to avoid fails due to --implicit-check-not
+ERRUNDEF: {{^Input was:([[:space:]]|.)*}}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
index 9181cce6..9ee5484 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
@@ -97,14 +97,12 @@ define i8 @pr141968(i1 %cond, i8 %v) {
 ; CHECK:       [[PRED_SDIV_IF29]]:
 ; CHECK-NEXT:    br label %[[PRED_SDIV_CONTINUE30]]
 ; CHECK:       [[PRED_SDIV_CONTINUE30]]:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT31:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT32:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT31]], <16 x i8> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <16 x i1> [[BROADCAST_SPLAT]], <16 x i8> zeroinitializer, <16 x i8> [[BROADCAST_SPLAT32]]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <16 x i1> [[BROADCAST_SPLAT]], i32 0
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 [[TMP18]], i8 0, i8 [[V]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
@@ -121,7 +119,7 @@ define i8 @pr141968(i1 %cond, i8 %v) {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i8 [[IV_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RET_LCSSA:%.*]] = phi i8 [ [[RET]], %[[LOOP_LATCH]] ], [ [[TMP18]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[RET_LCSSA:%.*]] = phi i8 [ [[RET]], %[[LOOP_LATCH]] ], [ [[PREDPHI]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i8 [[RET_LCSSA]]
 ;
 entry:
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll
index fe7d725..0c3a7c6 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll
@@ -69,6 +69,7 @@ define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i3
   %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
   %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
   %r11 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 %n)
+  %r12 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 %n)
   ret void
 }
 
@@ -113,6 +114,7 @@ define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1,
   %rF = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
   %r10 = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
   %r11 = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %n)
+  %r12 = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %n)
   ret void
 }
 
@@ -325,6 +327,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; LEGAL_LEGAL-NEXT:   %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
 ; LEGAL_LEGAL-NEXT:   %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
 ; LEGAL_LEGAL-NEXT:   %r11 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 %n)
+; LEGAL_LEGAL-NEXT:   %r12 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 %n)
 ; LEGAL_LEGAL-NEXT:   ret void
 
 ; LEGAL_LEGAL:define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
@@ -346,6 +349,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; LEGAL_LEGAL-NEXT:  %rF = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
 ; LEGAL_LEGAL-NEXT:  %r10 = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
 ; LEGAL_LEGAL-NEXT:  %r11 = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %n)
+; LEGAL_LEGAL-NEXT:  %r12 = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %n)
 ; LEGAL_LEGAL-NEXT:  ret void
 
 ; LEGAL_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
@@ -424,6 +428,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; DISCARD_LEGAL-NEXT:   [[EVLMASK2:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT2]]
 ; DISCARD_LEGAL-NEXT:   [[NEWMASK2:%.+]] = and <8 x i1> [[EVLMASK2]], %m
 ; DISCARD_LEGAL-NEXT:   %r11 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> [[NEWMASK2]], <8 x i32> %i0, <8 x i32> %i1, i32 8)
+; DISCARD_LEGAL-NEXT:   %r12 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 8)
 ; DISCARD_LEGAL-NEXT:   ret void
 
 ; TODO compute vscale only once and use caching.
@@ -441,6 +446,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; DISCARD_LEGAL:      %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size{{.*}})
 ; DISCARD_LEGAL-NOT:  %{{.+}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
 ; DISCARD_LEGAL:      %r11 = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %scalable_size{{.*}})
+; DISCARD_LEGAL:      %r12 = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %scalable_size{{.*}})
 ; DISCARD_LEGAL-NEXT: ret void
 
 ; DISCARD_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
@@ -514,6 +520,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
 ; CONVERT_LEGAL-NOT:   %{{.+}} = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
 ; CONVERT_LEGAL:        %r11 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %{{.*}}, <8 x i32> %i0, <8 x i32> %i1, i32 8)
+; CONVERT_LEGAL:        %r12 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %{{.*}}, <8 x i32> %i0, <8 x i32> %i1, i32 8)
 ; CONVERT_LEGAL:       ret void
 
 ; Similar to %evl discard, %mask legal but make sure the first VP intrinsic has a legal expansion
@@ -525,6 +532,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; CONVERT_LEGAL-NEXT:   %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size)
 ; CONVERT_LEGAL-NOT:    %{{.*}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
 ; CONVERT_LEGAL:        %r11 = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %scalable_size{{.*}})
+; CONVERT_LEGAL:        %r12 = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %scalable_size{{.*}})
 ; CONVERT_LEGAL:        ret void
 
 ; CONVERT_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {