88 files changed, 2696 insertions, 595 deletions
diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py
index 5026c29..36c9585 100644
--- a/.ci/generate_test_report_lib.py
+++ b/.ci/generate_test_report_lib.py
@@ -98,6 +98,23 @@ def _format_ninja_failures(ninja_failures: list[tuple[str, str]]) -> list[str]:
         )
     return output
 
+def get_failures(junit_objects) -> dict[str, list[tuple[str, str]]]:
+    failures = {}
+    for results in junit_objects:
+        for testsuite in results:
+            for test in testsuite:
+                if (
+                    not test.is_passed
+                    and test.result
+                    and isinstance(test.result[0], Failure)
+                ):
+                    if failures.get(testsuite.name) is None:
+                        failures[testsuite.name] = []
+                    failures[testsuite.name].append(
+                        (test.classname + "/" + test.name, test.result[0].text)
+                    )
+    return failures
+
 
 # Set size_limit to limit the byte size of the report. The default is 1MB as this
 # is the most that can be put into an annotation. If the generated report exceeds
@@ -113,7 +130,7 @@ def generate_report(
     size_limit=1024 * 1024,
     list_failures=True,
 ):
-    failures = {}
+    failures = get_failures(junit_objects)
     tests_run = 0
     tests_skipped = 0
     tests_failed = 0
@@ -124,18 +141,6 @@ def generate_report(
             tests_skipped += testsuite.skipped
             tests_failed += testsuite.failures
 
-            for test in testsuite:
-                if (
-                    not test.is_passed
-                    and test.result
-                    and isinstance(test.result[0], Failure)
-                ):
-                    if failures.get(testsuite.name) is None:
-                        failures[testsuite.name] = []
-                    failures[testsuite.name].append(
-                        (test.classname + "/" + test.name, test.result[0].text)
-                    )
-
     report = [f"# {title}", ""]
 
     if tests_run == 0:
@@ -258,7 +263,7 @@ def generate_report(
     return report
 
 
-def generate_report_from_files(title, return_code, build_log_files):
+def load_info_from_files(build_log_files):
     junit_files = [
         junit_file for junit_file in build_log_files if junit_file.endswith(".xml")
     ]
@@ -271,6 +276,9 @@ def generate_report_from_files(title, return_code, build_log_files):
             ninja_logs.append(
                 [log_line.strip() for log_line in ninja_log_file_handle.readlines()]
             )
-    return generate_report(
-        title, return_code, [JUnitXml.fromfile(p) for p in junit_files], ninja_logs
-    )
+    return [JUnitXml.fromfile(p) for p in junit_files], ninja_logs
+
+
+def generate_report_from_files(title, return_code, build_log_files):
+    junit_objects, ninja_logs = load_info_from_files(build_log_files)
+    return generate_report(title, return_code, junit_objects, ninja_logs)
diff --git a/clang/docs/PointerAuthentication.rst b/clang/docs/PointerAuthentication.rst
index 96eb498..7e65f4b 100644
--- a/clang/docs/PointerAuthentication.rst
+++ b/clang/docs/PointerAuthentication.rst
@@ -592,6 +592,36 @@ The result value is never zero and always within range for both the
 
 This can be used in constant expressions.
 
+``ptrauth_type_discriminator``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+  ptrauth_type_discriminator(type)
+
+Compute the constant discriminator derived from the given type, as is computed
+for automatically type diversified schemas.
+
+``type`` must be a type. The result has the type ``ptrauth_extra_data_t``.
+
+This can be used in constant expressions.
+
+``ptrauth_function_pointer_type_discriminator``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c
+
+  ptrauth_function_pointer_type_discriminator(function_type)
+
+Compute the constant discriminator derived from the provided function type, for
+use in contexts where the default function authentication schema. If function
+pointer type diversity is enabled, this is equivalent to
+`ptrauth_type_discriminator(function_type)`, if it is not enabled this is `0`.
+
+``function_type`` must be a function type. The result has the type ``ptrauth_extra_data_t``.
+
+This can be used in constant expressions.
+
 ``ptrauth_strip``
 ^^^^^^^^^^^^^^^^^
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index edb872c..79ff821 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -191,6 +191,8 @@ C23 Feature Support
 - Added ``FLT_SNAN``, ``DBL_SNAN``, and ``LDBL_SNAN`` to Clang's ``<float.h>``
   header in C23 and later modes. This implements
   `WG14 N2710 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2710.htm>`_.
+- Fixed accepting as compatible unnamed tag types with the same fields within
+  the same translation unit but from different types.
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index a8b41ba..3603e9cd 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -11581,6 +11581,12 @@ QualType ASTContext::mergeTagDefinitions(QualType LHS, QualType RHS) {
   if (LangOpts.CPlusPlus || !LangOpts.C23)
     return {};
 
+  // Nameless tags are comparable only within outer definitions. At the top
+  // level they are not comparable.
+  const TagDecl *LTagD = LHS->castAsTagDecl(), *RTagD = RHS->castAsTagDecl();
+  if (!LTagD->getIdentifier() || !RTagD->getIdentifier())
+    return {};
+
   // C23, on the other hand, requires the members to be "the same enough", so
   // we use a structural equivalence check.
   StructuralEquivalenceContext::NonEquivalentDeclSet NonEquivalentDecls;
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index 1557346..b17cd6f 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -1763,19 +1763,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
   // another anonymous structure or union, respectively, if their members
   // fulfill the preceding requirements. ... Otherwise, the structure, union,
   // or enumerated types are incompatible.
-
-  // Note: "the same tag" refers to the identifier for the structure; two
-  // structures without names are not compatible within a TU. In C23, if either
-  // declaration has no name, they're not equivalent. However, the paragraph
-  // after the bulleted list goes on to talk about compatibility of anonymous
-  // structure and union members, so this prohibition only applies to top-level
-  // declarations; if either declaration is not a member, they cannot be
-  // compatible.
-  if (Context.LangOpts.C23 && (!D1->getIdentifier() || !D2->getIdentifier()) &&
-      (!D1->getDeclContext()->isRecord() || !D2->getDeclContext()->isRecord()))
-    return false;
-
-  // Otherwise, check the names for equivalence.
   if (!NameIsStructurallyEquivalent(*D1, *D2))
     return false;
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index cfd48a2..5ba64dd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -536,7 +536,7 @@ mlir::LogicalResult CIRGenFunction::emitLabel(const clang::LabelDecl &d) {
   mlir::Block *currBlock = builder.getBlock();
   mlir::Block *labelBlock = currBlock;
 
-  if (!currBlock->empty()) {
+  if (!currBlock->empty() || currBlock->isEntryBlock()) {
     {
       mlir::OpBuilder::InsertionGuard guard(builder);
       labelBlock = builder.createBlock(builder.getBlock()->getParent());
diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
index f902ca1..ad28f06 100644
--- a/clang/lib/Headers/ptrauth.h
+++ b/clang/lib/Headers/ptrauth.h
@@ -241,6 +241,18 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
 #define ptrauth_type_discriminator(__type)                                     \
   __builtin_ptrauth_type_discriminator(__type)
 
+/* Compute the constant discriminator used by Clang to sign pointers with the
+   given C function pointer type.
+
+   A call to this function is an integer constant expression. */
+#if __has_feature(ptrauth_function_pointer_type_discrimination)
+#define ptrauth_function_pointer_type_discriminator(__type)                    \
+  __builtin_ptrauth_type_discriminator(__type)
+#else
+#define ptrauth_function_pointer_type_discriminator(__type)                    \
+  ((ptrauth_extra_data_t)0)
+#endif
+
 /* Compute a signature for the given pair of pointer-sized values.
    The order of the arguments is significant.
 
@@ -372,6 +384,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
   })
 
 #define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0)
+#define ptrauth_function_pointer_type_discriminator(__type)                    \
+  ((ptrauth_extra_data_t)0)
 
 #define ptrauth_sign_generic_data(__value, __data)                             \
   ({                                                                           \
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
index 66cfccb..c1a5000 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
@@ -26,6 +26,7 @@ bool tryToFindPtrOrigin(
     const Expr *E, bool StopAtFirstRefCountedObj,
     std::function<bool(const clang::CXXRecordDecl *)> isSafePtr,
     std::function<bool(const clang::QualType)> isSafePtrType,
+    std::function<bool(const clang::Decl *)> isSafeGlobalDecl,
     std::function<bool(const clang::Expr *, bool)> callback) {
   while (E) {
     if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
@@ -34,6 +35,8 @@ bool tryToFindPtrOrigin(
         auto IsImmortal = safeGetName(VD) == "NSApp";
         if (VD->hasGlobalStorage() && (IsImmortal || QT.isConstQualified()))
           return callback(E, true);
+        if (VD->hasGlobalStorage() && isSafeGlobalDecl(VD))
+          return callback(E, true);
       }
     }
     if (auto *tempExpr = dyn_cast<MaterializeTemporaryExpr>(E)) {
@@ -71,9 +74,11 @@ bool tryToFindPtrOrigin(
     }
     if (auto *Expr = dyn_cast<ConditionalOperator>(E)) {
       return tryToFindPtrOrigin(Expr->getTrueExpr(), StopAtFirstRefCountedObj,
-                                isSafePtr, isSafePtrType, callback) &&
+                                isSafePtr, isSafePtrType, isSafeGlobalDecl,
+                                callback) &&
              tryToFindPtrOrigin(Expr->getFalseExpr(), StopAtFirstRefCountedObj,
-                                isSafePtr, isSafePtrType, callback);
+                                isSafePtr, isSafePtrType, isSafeGlobalDecl,
+                                callback);
     }
     if (auto *cast = dyn_cast<CastExpr>(E)) {
       if (StopAtFirstRefCountedObj) {
@@ -93,7 +98,8 @@ bool tryToFindPtrOrigin(
     if (auto *call = dyn_cast<CallExpr>(E)) {
       if (auto *Callee = call->getCalleeDecl()) {
         if (Callee->hasAttr<CFReturnsRetainedAttr>() ||
-            Callee->hasAttr<NSReturnsRetainedAttr>()) {
+            Callee->hasAttr<NSReturnsRetainedAttr>() ||
+            Callee->hasAttr<NSReturnsAutoreleasedAttr>()) {
           return callback(E, true);
         }
       }
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
index 3a009d6..9fff456 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
@@ -56,6 +56,7 @@ bool tryToFindPtrOrigin(
     const clang::Expr *E, bool StopAtFirstRefCountedObj,
     std::function<bool(const clang::CXXRecordDecl *)> isSafePtr,
     std::function<bool(const clang::QualType)> isSafePtrType,
+    std::function<bool(const clang::Decl *)> isSafeGlobalDecl,
     std::function<bool(const clang::Expr *, bool)> callback);
 
 /// For \p E referring to a ref-countable/-counted pointer/reference we return
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp
index 9585ceb..791e709 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefCallArgsChecker.cpp
@@ -29,12 +29,12 @@ namespace {
 class RawPtrRefCallArgsChecker
     : public Checker<check::ASTDecl<TranslationUnitDecl>> {
   BugType Bug;
-  mutable BugReporter *BR;
 
   TrivialFunctionAnalysis TFA;
   EnsureFunctionAnalysis EFA;
 
 protected:
+  mutable BugReporter *BR;
   mutable std::optional<RetainTypeChecker> RTC;
 
 public:
@@ -46,6 +46,7 @@ public:
   virtual bool isSafePtr(const CXXRecordDecl *Record) const = 0;
   virtual bool isSafePtrType(const QualType type) const = 0;
   virtual bool isSafeExpr(const Expr *) const { return false; }
+  virtual bool isSafeDecl(const Decl *) const { return false; }
   virtual const char *ptrKind() const = 0;
 
   void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
@@ -214,6 +215,7 @@ public:
         Arg, /*StopAtFirstRefCountedObj=*/true,
         [&](const clang::CXXRecordDecl *Record) { return isSafePtr(Record); },
         [&](const clang::QualType T) { return isSafePtrType(T); },
+        [&](const clang::Decl *D) { return isSafeDecl(D); },
         [&](const clang::Expr *ArgOrigin, bool IsSafe) {
           if (IsSafe)
             return true;
@@ -479,6 +481,11 @@ public:
            isa<ObjCMessageExpr>(E);
   }
 
+  bool isSafeDecl(const Decl *D) const final {
+    // Treat NS/CF globals in system header as immortal.
+    return BR->getSourceManager().isInSystemHeader(D->getLocation());
+  }
+
   const char *ptrKind() const final { return "unretained"; }
 };
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp
index dd9701f..c13df479 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLocalVarsChecker.cpp
@@ -166,10 +166,10 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded,
 class RawPtrRefLocalVarsChecker
     : public Checker<check::ASTDecl<TranslationUnitDecl>> {
   BugType Bug;
-  mutable BugReporter *BR;
   EnsureFunctionAnalysis EFA;
 
 protected:
+  mutable BugReporter *BR;
   mutable std::optional<RetainTypeChecker> RTC;
 
 public:
@@ -180,6 +180,7 @@ public:
   virtual bool isSafePtr(const CXXRecordDecl *) const = 0;
   virtual bool isSafePtrType(const QualType) const = 0;
   virtual bool isSafeExpr(const Expr *) const { return false; }
+  virtual bool isSafeDecl(const Decl *) const { return false; }
   virtual const char *ptrKind() const = 0;
 
   void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
@@ -288,6 +289,7 @@ public:
                 return isSafePtr(Record);
               },
               [&](const clang::QualType Type) { return isSafePtrType(Type); },
+              [&](const clang::Decl *D) { return isSafeDecl(D); },
               [&](const clang::Expr *InitArgOrigin, bool IsSafe) {
                 if (!InitArgOrigin || IsSafe)
                   return true;
@@ -443,6 +445,10 @@ public:
     return ento::cocoa::isCocoaObjectRef(E->getType()) &&
            isa<ObjCMessageExpr>(E);
   }
+  bool isSafeDecl(const Decl *D) const final {
+    // Treat NS/CF globals in system header as immortal.
+    return BR->getSourceManager().isInSystemHeader(D->getLocation());
+  }
   const char *ptrKind() const final { return "unretained"; }
 };
 
diff --git a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h
index 1e44de8..d55b3ab 100644
--- a/clang/test/Analysis/Checkers/WebKit/mock-system-header.h
+++ b/clang/test/Analysis/Checkers/WebKit/mock-system-header.h
@@ -34,6 +34,8 @@ void os_log_msg(os_log_t oslog, os_log_type_t type, const char *msg, ...);
 
 typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStringRef;
 
+extern CFStringRef const kCFURLTagNamesKey;
+
 #ifdef __OBJC__
 @class NSString;
 @interface SystemObject {
@@ -41,4 +43,8 @@ typedef const struct __attribute__((objc_bridge(NSString))) __CFString * CFStrin
   CFStringRef cf_string;
 }
 @end
+
+typedef NSString *NSNotificationName;
+extern "C" NSNotificationName NSApplicationDidBecomeActiveNotification;
+
 #endif
diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
index a517dbc..5dc3b38 100644
--- a/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
+++ b/clang/test/Analysis/Checkers/WebKit/unretained-call-args.mm
@@ -567,6 +567,17 @@ struct Derived : Base {
 
 } // namespace ns_retained_return_value
 
+namespace autoreleased {
+
+NSString *provideAutoreleased() __attribute__((ns_returns_autoreleased));
+void consume(NSString *);
+
+void foo() {
+  consume(provideAutoreleased());
+}
+
+} // autoreleased
+
 @interface TestObject : NSObject
 - (void)doWork:(NSString *)msg, ...;
 - (void)doWorkOnSelf;
diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm
index 307a4d03..f49e7bd 100644
--- a/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm
+++ b/clang/test/Analysis/Checkers/WebKit/unretained-local-vars.mm
@@ -1,8 +1,11 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedLocalVarsChecker -verify %s
 
 #import "objc-mock-types.h"
+#import "mock-system-header.h"
 
 void someFunction();
+extern "C" CFStringRef LocalGlobalCFString;
+extern "C" NSString *LocalGlobalNSString;
 
 namespace raw_ptr {
 void foo() {
@@ -535,6 +538,41 @@ unsigned foo() {
 
 } // namespace ns_retained_return_value
 
+namespace autoreleased {
+
+NSString *provideAutoreleased() __attribute__((ns_returns_autoreleased));
+void consume(NSString *);
+
+void foo() {
+  auto *string = provideAutoreleased();
+  consume(string);
+}
+
+} // autoreleased
+
+namespace ns_global {
+
+void consumeCFString(CFStringRef);
+void consumeNSString(NSString *);
+
+void cf() {
+  auto *str = kCFURLTagNamesKey;
+  consumeCFString(str);
+  auto *localStr = LocalGlobalCFString;
+  // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}}
+  consumeCFString(localStr);
+}
+
+void ns() {
+  auto *str = NSApplicationDidBecomeActiveNotification;
+  consumeNSString(str);
+  auto *localStr = LocalGlobalNSString;
+  // expected-warning@-1{{Local variable 'localStr' is unretained and unsafe [alpha.webkit.UnretainedLocalVarsChecker]}}
+  consumeNSString(localStr);
+}
+
+}
+
 bool doMoreWorkOpaque(OtherObj*);
 SomeObj* provide();
 
diff --git a/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm
new file mode 100644
index 0000000..5c78b21
--- /dev/null
+++ b/clang/test/Analysis/Checkers/WebKit/unretained-obj-arg.mm
@@ -0,0 +1,18 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UnretainedCallArgsChecker -verify %s
+
+#import "mock-types.h"
+#import "mock-system-header.h"
+
+void consumeCFString(CFStringRef);
+extern "C" CFStringRef LocalGlobalCFString;
+void consumeNSString(NSString *);
+extern "C" NSString *LocalGlobalNSString;
+
+void foo() {
+  consumeCFString(kCFURLTagNamesKey);
+  consumeCFString(LocalGlobalCFString);
+    // expected-warning@-1{{Call argument is unretained and unsafe}}
+  consumeNSString(NSApplicationDidBecomeActiveNotification);
+  consumeNSString(LocalGlobalNSString);
+    // expected-warning@-1{{Call argument is unretained and unsafe}}
+}
diff --git a/clang/test/C/C23/n3037.c b/clang/test/C/C23/n3037.c
index 3748375..113ecf7 100644
--- a/clang/test/C/C23/n3037.c
+++ b/clang/test/C/C23/n3037.c
@@ -30,11 +30,24 @@ void func2(PRODUCT(int, SUM(float, double)) y) { // c17-warning {{declaration of
 
 struct foop { struct { int x; }; }; // c17-note {{previous definition is here}}
 struct foop { struct { int x; }; }; // c17-error {{redefinition of 'foop'}}
+// Test the field lookup compatibility isn't sufficient, the structure of types should be compatible.
+struct AnonymousStructNotMatchingFields { // c17-note {{previous definition is here}}
+  struct { // c23-note {{field has name '' here}}
+    int x;
+  };
+};
+struct AnonymousStructNotMatchingFields { // c23-error {{type 'struct AnonymousStructNotMatchingFields' has incompatible definitions}} \
+                                             c17-error {{redefinition of 'AnonymousStructNotMatchingFields'}}
+  int x; // c23-note {{field has name 'x' here}}
+};
+
 union barp { int x; float y; };     // c17-note {{previous definition is here}}
 union barp { int x; float y; };     // c17-error {{redefinition of 'barp'}}
 typedef struct q { int x; } q_t;    // c17-note 2 {{previous definition is here}}
 typedef struct q { int x; } q_t;    // c17-error {{redefinition of 'q'}} \
                                        c17-error-re {{typedef redefinition with different types ('struct (unnamed struct at {{.*}})' vs 'struct q')}}
+typedef struct { int x; } untagged_q_t; // both-note {{previous definition is here}}
+typedef struct { int x; } untagged_q_t; // both-error {{typedef redefinition with different types}}
 void func3(void) {
   struct S { int x; };       // c17-note {{previous definition is here}}
   struct T { struct S s; };  // c17-note {{previous definition is here}}
@@ -389,13 +402,40 @@ void nontag_both_in_params(struct { int i; } Arg1, struct { int i; } Arg2) {
   _Static_assert(0 == _Generic(__typeof__(Arg1), __typeof__(Arg2) : 1, default : 0)); // both-warning {{passing a type argument as the first operand to '_Generic' is a C2y extension}}
 }
 
-struct InnerAnonStruct {
+struct InnerUnnamedStruct {
   struct {
     int i;
   } untagged;
-} inner_anon_tagged;
+} inner_unnamed_tagged;
+_Static_assert(0 == _Generic(inner_unnamed_tagged.untagged, struct { int i; } : 1, default : 0));
 
-_Static_assert(0 == _Generic(inner_anon_tagged.untagged, struct { int i; } : 1, default : 0));
+struct InnerUnnamedStruct_same {
+  struct {
+    int i;
+  } untagged;
+};
+struct InnerUnnamedStruct_differentNaming {
+  struct {
+    int i;
+  } untaggedDifferent;
+};
+struct InnerUnnamedStruct_differentShape {
+  float x;
+  struct {
+    int i;
+  } untagged;
+  int y;
+};
+void compare_unnamed_struct_from_different_outer_type(
+    struct InnerUnnamedStruct sameOuterType,
+    struct InnerUnnamedStruct_same matchingType,
+    struct InnerUnnamedStruct_differentNaming differentFieldName,
+    struct InnerUnnamedStruct_differentShape differentType) {
+  inner_unnamed_tagged.untagged = sameOuterType.untagged;
+  inner_unnamed_tagged.untagged = matchingType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}}
+  inner_unnamed_tagged.untagged = differentFieldName.untaggedDifferent; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}}
+  inner_unnamed_tagged.untagged = differentType.untagged; // both-error-re {{assigning to 'struct (unnamed struct at {{.*}})' from incompatible type 'struct (unnamed struct at {{.*}})'}}
+}
 
 // Test the same thing with enumerations (test for unions is omitted because
 // unions and structures are both RecordDecl objects, whereas EnumDecl is not).
diff --git a/clang/test/CIR/CodeGen/goto.cpp b/clang/test/CIR/CodeGen/goto.cpp
index 48cb44e..257c255 100644
--- a/clang/test/CIR/CodeGen/goto.cpp
+++ b/clang/test/CIR/CodeGen/goto.cpp
@@ -205,6 +205,8 @@ extern "C" void case_follow_label(int v) {
 // CIR: cir.func dso_local @case_follow_label
 // CIR: cir.switch
 // CIR: cir.case(equal, [#cir.int<1> : !s32i]) {
+// CIR:   cir.br ^bb1
+// CIR: ^bb1:
 // CIR:   cir.label "label"
 // CIR: cir.case(equal, [#cir.int<2> : !s32i]) {
 // CIR:   cir.call @action1()
@@ -215,9 +217,11 @@ extern "C" void case_follow_label(int v) {
 
 // LLVM: define dso_local void @case_follow_label
 // LLVM:  switch i32 {{.*}}, label %[[SWDEFAULT:.*]] [
-// LLVM:    i32 1, label %[[LABEL:.*]]
+// LLVM:    i32 1, label %[[CASE1:.*]]
 // LLVM:    i32 2, label %[[CASE2:.*]]
 // LLVM:  ]
+// LLVM: [[CASE1]]:
+// LLVM:   br label %[[LABEL:.*]]
 // LLVM: [[LABEL]]:
 // LLVM:   br label %[[CASE2]]
 // LLVM: [[CASE2]]:
@@ -303,3 +307,24 @@ extern "C" void default_follow_label(int v) {
 // OGCG:   br label %label
 // OGCG: sw.epilog:
 // OGCG:   ret void
+
+void g3() {
+label:
+  goto label;
+}
+
+// CIR:  cir.func dso_local @_Z2g3v
+// CIR:    cir.br ^bb1
+// CIR:  ^bb1:
+// CIR:    cir.label "label"
+// CIR:    cir.goto "label"
+
+// LLVM: define dso_local void @_Z2g3v()
+// LLVM:   br label %1
+// LLVM: 1:
+// LLVM:   br label %1
+
+// OGCG: define dso_local void @_Z2g3v()
+// OGCG:   br label %label
+// OGCG: label:
+// OGCG:   br label %label
diff --git a/clang/test/CIR/CodeGen/label.c b/clang/test/CIR/CodeGen/label.c
index a050094..f5345ef 100644
--- a/clang/test/CIR/CodeGen/label.c
+++ b/clang/test/CIR/CodeGen/label.c
@@ -11,10 +11,14 @@ labelA:
 }
 
 // CIR:  cir.func no_proto dso_local @label
+// CIR:     cir.br ^bb1
+// CIR:  ^bb1:
 // CIR:    cir.label "labelA"
 // CIR:    cir.return
 
 // LLVM:define dso_local void @label
+// LLVM:   br label %1
+// LLVM: 1:
 // LLVM:  ret void
 
 // OGCG: define dso_local void @label
@@ -29,15 +33,19 @@ labelC:
 }
 
 // CIR:  cir.func no_proto dso_local @multiple_labels
-// CIR:    cir.label "labelB"
 // CIR:    cir.br ^bb1
-// CIR:  ^bb1:  // pred: ^bb0
+// CIR:  ^bb1:
+// CIR:    cir.label "labelB"
+// CIR:    cir.br ^bb2
+// CIR:  ^bb2:
 // CIR:    cir.label "labelC"
 // CIR:    cir.return
 
 // LLVM: define dso_local void @multiple_labels()
 // LLVM:   br label %1
 // LLVM: 1:
+// LLVM:   br label %2
+// LLVM: 2:
 // LLVM:   ret void
 
 // OGCG: define dso_local void @multiple_labels
@@ -56,6 +64,8 @@ labelD:
 
 // CIR:  cir.func dso_local @label_in_if
 // CIR:      cir.if {{.*}} {
+// CIR:        cir.br ^bb1
+// CIR:      ^bb1:
 // CIR:        cir.label "labelD"
 // CIR:        [[LOAD:%.*]] = cir.load align(4) [[COND:%.*]] : !cir.ptr<!s32i>, !s32i
 // CIR:        [[INC:%.*]] = cir.unary(inc, %3) nsw : !s32i, !s32i
@@ -68,15 +78,17 @@ labelD:
 // LLVM: 3:
 // LLVM:   [[LOAD:%.*]] = load i32, ptr [[COND:%.*]], align 4
 // LLVM:   [[CMP:%.*]] = icmp ne i32 [[LOAD]], 0
-// LLVM:   br i1 [[CMP]], label %6, label %9
+// LLVM:   br i1 [[CMP]], label %6, label %10
 // LLVM: 6:
+// LLVM:   br label %7
+// LLVM: 7:
 // LLVM:   [[LOAD2:%.*]] = load i32, ptr [[COND]], align 4
 // LLVM:   [[ADD1:%.*]] = add nsw i32 [[LOAD2]], 1
 // LLVM:   store i32 [[ADD1]], ptr [[COND]], align 4
-// LLVM:   br label %9
-// LLVM: 9:
 // LLVM:   br label %10
 // LLVM: 10:
+// LLVM:   br label %11
+// LLVM: 11:
 // LLVM:  ret void
 
 // OGCG: define dso_local void @label_in_if
@@ -142,11 +154,15 @@ end:
   return;
 }
 // CIR:  cir.func no_proto dso_local @labelWithoutMatch
+// CIR:    cir.br ^bb1
+// CIR:  ^bb1:
 // CIR:    cir.label "end"
 // CIR:    cir.return
 // CIR:  }
 
 // LLVM: define dso_local void @labelWithoutMatch
+// LLVM:   br label %1
+// LLVM: 1:
 // LLVM:   ret void
 
 // OGCG: define dso_local void @labelWithoutMatch
@@ -167,13 +183,17 @@ void foo() {
 
 // CIR: cir.func no_proto dso_local @foo
 // CIR:   cir.scope {
-// CIR:     cir.label "label"
 // CIR:     %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["agg.tmp0"]
+// CIR:      cir.br ^bb1
+// CIR:    ^bb1:
+// CIR:     cir.label "label"
 
 // LLVM:define dso_local void @foo() {
 // LLVM:  [[ALLOC:%.*]] = alloca %struct.S, i64 1, align 1
 // LLVM:  br label %2
 // LLVM:2:
+// LLVM:  br label %3
+// LLVM:3:
 // LLVM:  [[CALL:%.*]] = call %struct.S @get()
 // LLVM:  store %struct.S [[CALL]], ptr [[ALLOC]], align 1
 // LLVM:  [[LOAD:%.*]] = load %struct.S, ptr [[ALLOC]], align 1
diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl
deleted file mode 100644
index f48521b..0000000
--- a/clang/test/CodeGenHLSL/resources/RWBuffer-elementtype.hlsl
+++ /dev/null
@@ -1,70 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL
-// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPIRV
-
-// DXIL: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", i16, 1, 0, 1) }
-// DXIL: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", i16, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", i32, 1, 0, 1) }
-// DXIL: %"class.hlsl::RWBuffer.2" = type { target("dx.TypedBuffer", i32, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.3" = type { target("dx.TypedBuffer", i64, 1, 0, 1) }
-// DXIL: %"class.hlsl::RWBuffer.4" = type { target("dx.TypedBuffer", i64, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.5" = type { target("dx.TypedBuffer", half, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.6" = type { target("dx.TypedBuffer", float, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.7" = type { target("dx.TypedBuffer", double, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.8" = type { target("dx.TypedBuffer", <4 x i16>, 1, 0, 1) }
-// DXIL: %"class.hlsl::RWBuffer.9" = type { target("dx.TypedBuffer", <3 x i32>, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.10" = type { target("dx.TypedBuffer", <2 x half>, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.11" = type { target("dx.TypedBuffer", <3 x float>, 1, 0, 0) }
-// DXIL: %"class.hlsl::RWBuffer.12" = type { target("dx.TypedBuffer", <4 x i32>, 1, 0, 1) }
-
-// SPIRV: %"class.hlsl::RWBuffer" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 24) }
-// SPIRV: %"class.hlsl::RWBuffer.2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) }
-// SPIRV: %"class.hlsl::RWBuffer.3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 2, 41) }
-// SPIRV: %"class.hlsl::RWBuffer.4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 2, 40) }
-// SPIRV: %"class.hlsl::RWBuffer.5" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.6" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 3) }
-// SPIRV: %"class.hlsl::RWBuffer.7" = type { target("spirv.Image", double, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.10" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.11" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 0) }
-// SPIRV: %"class.hlsl::RWBuffer.12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 21) }
-
-RWBuffer<int16_t> BufI16;
-RWBuffer<uint16_t> BufU16;
-RWBuffer<int> BufI32;
-RWBuffer<uint> BufU32;
-RWBuffer<int64_t> BufI64;
-RWBuffer<uint64_t> BufU64;
-RWBuffer<half> BufF16;
-RWBuffer<float> BufF32;
-RWBuffer<double> BufF64;
-RWBuffer< vector<int16_t, 4> > BufI16x4;
-RWBuffer< vector<uint, 3> > BufU32x3;
-RWBuffer<half2> BufF16x2;
-RWBuffer<float3> BufF32x3;
-RWBuffer<int4> BufI32x4;
-// TODO: RWBuffer<snorm half> BufSNormF16; -> 11
-// TODO: RWBuffer<unorm half> BufUNormF16; -> 12
-// TODO: RWBuffer<snorm float> BufSNormF32; -> 13
-// TODO: RWBuffer<unorm float> BufUNormF32; -> 14
-// TODO: RWBuffer<snorm double> BufSNormF64; -> 15
-// TODO: RWBuffer<unorm double> BufUNormF64; -> 16
-
-[numthreads(1,1,1)]
-void main(int GI : SV_GroupIndex) {
-  BufI16[GI] = 0;
-  BufU16[GI] = 0;
-  BufI32[GI] = 0;
-  BufU32[GI] = 0;
-  BufI64[GI] = 0;
-  BufU64[GI] = 0;
-  BufF16[GI] = 0;
-  BufF32[GI] = 0;
-  BufF64[GI] = 0;
-  BufI16x4[GI] = 0;
-  BufU32x3[GI] = 0;
-  BufF16x2[GI] = 0;
-  BufF32x3[GI] = 0;
-}
diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl
deleted file mode 100644
index 0de171c..0000000
--- a/clang/test/CodeGenHLSL/resources/RWBuffer-subscript.hlsl
+++ /dev/null
@@ -1,26 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=DXC,CHECK
-// RUN: %clang_cc1 -triple spirv1.6-pc-vulkan1.3-compute -fspv-use-unknown-image-format -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=SPIRV,CHECK
-
-RWBuffer<int> In;
-RWBuffer<int> Out;
-
-[numthreads(1,1,1)]
-void main(unsigned GI : SV_GroupIndex) {
-  // CHECK: define void @main()
-
-  // DXC: %[[INPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
-  // SPIRV: %[[INPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
-  // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
-  // DXC: %[[OUTPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
-  // SPIRV: %[[OUTPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
-  // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]]
-  Out[GI] = In[GI];
-
-  // DXC: %[[INPTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
-  // SPIRV: %[[INPTR:.*]] = call ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
-  // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
-  // DXC: %[[OUTPTR:.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
-  // SPIRV: %[[OUTPTR:.*]] = call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
-  // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]]
-  Out[GI] = In.Load(GI);
-}
diff --git a/clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl
index ca33c42..1ec9f0f 100644
--- a/clang/test/CodeGenHLSL/resources/RWBuffer-constructor.hlsl
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-constructor.hlsl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | \
-// RUN:   llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+// RUN: llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
 // FIXME: SPIR-V codegen of llvm.spv.resource.handlefrombinding and resource types is not yet implemented
 // RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | \
 //        llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
@@ -14,7 +14,7 @@
 RWBuffer<float> Buf1 : register(u5, space3);
 
 // Resource with implicit binding
-RWBuffer<double> Buf2;
+Buffer<double> Buf2;
 
 export void foo() {
     // Local resource declaration
@@ -22,12 +22,12 @@ export void foo() {
 }
 
 // CHECK: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", float, 1, 0, 0) }
-// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", double, 1, 0, 0) }
-// CHECK: %"class.hlsl::RWBuffer.1" = type { target("dx.TypedBuffer", i32, 1, 0, 1) }
+// CHECK: %"class.hlsl::Buffer" = type { target("dx.TypedBuffer", double, 0, 0, 0) }
+// CHECK: %"class.hlsl::RWBuffer.0" = type { target("dx.TypedBuffer", i32, 1, 0, 1) }
 
 // CHECK: @Buf1 = internal global %"class.hlsl::RWBuffer" poison, align 4
 // CHECK: @[[Buf1Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf1\00", align 1
-// CHECK: @Buf2 = internal global %"class.hlsl::RWBuffer.0" poison, align 4
+// CHECK: @Buf2 = internal global %"class.hlsl::Buffer" poison, align 4
 // CHECK: @[[Buf2Str:.*]] = private unnamed_addr constant [5 x i8] c"Buf2\00", align 1
 
 // Buf1 initialization part 1 - global init function that calls RWBuffer<float>::__createFromBinding
@@ -50,24 +50,24 @@ export void foo() {
 // Buf2 initialization part 1 - global init function that RWBuffer<float>::__createFromImplicitBinding
 // CHECK: define internal void @__cxx_global_var_init.1()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: call void @hlsl::RWBuffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+// CHECK-NEXT: call void @hlsl::Buffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
 // CHECK-SAME: (ptr {{.*}} @Buf2, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 0, ptr noundef @[[Buf2Str]])
 
-// Buf2 initialization part 2 - body of RWBuffer<float>::__createFromImplicitBinding call
-// CHECK: define linkonce_odr hidden void @hlsl::RWBuffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
-// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::RWBuffer.0") align 4 %[[RetValue2:.*]], i32 noundef %orderId, 
+// Buf2 initialization part 2 - body of Buffer<double>::__createFromImplicitBinding call
+// CHECK: define linkonce_odr hidden void @hlsl::Buffer<double>::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*)
+// CHECK-SAME: (ptr {{.*}} sret(%"class.hlsl::Buffer") align 4 %[[RetValue2:.*]], i32 noundef %orderId, 
 // CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, ptr noundef %name)
-// CHECK: %[[Tmp2:.*]] = alloca %"class.hlsl::RWBuffer.0", align 4
-// CHECK: %[[Handle2:.*]] = call target("dx.TypedBuffer", double, 1, 0, 0)
-// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_1_0_0t(
-// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %[[Tmp2]], i32 0, i32 0
-// CHECK-DXIL: store target("dx.TypedBuffer", double, 1, 0, 0) %[[Handle2]], ptr %__handle, align 4
-// CHECK: call void @hlsl::RWBuffer<double>::RWBuffer(hlsl::RWBuffer<double> const&)(ptr {{.*}} %[[RetValue2]], ptr {{.*}} %[[Tmp2]])
+// CHECK: %[[Tmp2:.*]] = alloca %"class.hlsl::Buffer", align 4
+// CHECK: %[[Handle2:.*]] = call target("dx.TypedBuffer", double, 0, 0, 0)
+// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_0_0_0t(
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %[[Tmp2]], i32 0, i32 0
+// CHECK-DXIL: store target("dx.TypedBuffer", double, 0, 0, 0) %[[Handle2]], ptr %__handle, align 4
+// CHECK: call void @hlsl::Buffer<double>::Buffer(hlsl::Buffer<double> const&)(ptr {{.*}} %[[RetValue2]], ptr {{.*}} %[[Tmp2]])
 
 // Buf3 initialization part 1 - local variable declared in function foo() is initialized by RWBuffer<int> C1 default constructor
 // CHECK: define void @foo()
 // CHECK-NEXT: entry:
-// CHECK-NEXT: %Buf3 = alloca %"class.hlsl::RWBuffer.1", align 4
+// CHECK-NEXT: %Buf3 = alloca %"class.hlsl::RWBuffer.0", align 4
 // CHECK-NEXT: call void @hlsl::RWBuffer<int>::RWBuffer()(ptr {{.*}} %Buf3)
 
 // Buf3 initialization part 2 - body of RWBuffer<int> default C1 constructor that calls the default C2 constructor
@@ -76,11 +76,11 @@ export void foo() {
 
 // Buf3 initialization part 3 - body of RWBuffer<int> default C2 constructor that initializes handle to poison
 // CHECK: define linkonce_odr hidden void @hlsl::RWBuffer<int>::RWBuffer()(ptr {{.*}} %this)
-// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.1", ptr %{{.*}}, i32 0, i32 0
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %{{.*}}, i32 0, i32 0
 // CHECK-NEXT: store target("dx.TypedBuffer", i32, 1, 0, 1) poison, ptr %__handle, align 4
 
 // Module initialization
-// CHECK: define internal void @_GLOBAL__sub_I_RWBuffer_constructor.hlsl()
+// CHECK: define internal void @_GLOBAL__sub_I_TypedBuffers_constructor.hlsl()
 // CHECK-NEXT: entry:
 // CHECK-NEXT: call void @__cxx_global_var_init()
 // CHECK-NEXT: call void @__cxx_global_var_init.1()
diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl
new file mode 100644
index 0000000..d3dba8a
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-elementtype.hlsl
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=Buffer %s | FileCheck %s -DRESOURCE=Buffer -DRW=0 -check-prefixes=DXIL
+
+// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=Buffer %s | FileCheck %s -DRESOURCE=Buffer -DRW=1 -check-prefixes=SPV-RO
+
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=RWBuffer %s | FileCheck %s -DRESOURCE=RWBuffer -DRW=1 -check-prefixes=DXIL
+
+// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=RWBuffer %s | FileCheck %s -DRESOURCE=RWBuffer --DRW=2 -check-prefixes=SPV-RW
+
+// DXIL: %"class.hlsl::[[RESOURCE]]" = type { target("dx.TypedBuffer", i16, [[RW]], 0, 1) }
+// DXIL: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.TypedBuffer", i16, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.TypedBuffer", i32, [[RW]], 0, 1) }
+// DXIL: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.TypedBuffer", i32, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.TypedBuffer", i64, [[RW]], 0, 1) }
+// DXIL: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.TypedBuffer", i64, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.TypedBuffer", half, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.TypedBuffer", float, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.TypedBuffer", double, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.TypedBuffer", <4 x i16>, [[RW]], 0, 1) }
+// DXIL: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.TypedBuffer", <3 x i32>, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.TypedBuffer", <2 x half>, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.TypedBuffer", <3 x float>, [[RW]], 0, 0) }
+// DXIL: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.TypedBuffer", <4 x i32>, [[RW]], 0, 1) }
+
+// SPV-RO: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.Image", half, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.Image", float, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.Image", double, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.Image", half, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.Image", float, 5, 2, 0, 0, 1, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) }
+
+// SPV-RW: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.Image", i16, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 24) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.SignedImage", i64, 5, 2, 0, 0, 2, 41) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.Image", i64, 5, 2, 0, 0, 2, 40) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 3) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.Image", double, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.SignedImage", i16, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.Image", i32, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.Image", half, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.Image", float, 5, 2, 0, 0, 2, 0) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 21) }
+
+RESOURCE<int16_t> BufI16;
+RESOURCE<uint16_t> BufU16;
+RESOURCE<int> BufI32;
+RESOURCE<uint> BufU32;
+RESOURCE<int64_t> BufI64;
+RESOURCE<uint64_t> BufU64;
+RESOURCE<half> BufF16;
+RESOURCE<float> BufF32;
+RESOURCE<double> BufF64;
+RESOURCE< vector<int16_t, 4> > BufI16x4;
+RESOURCE< vector<uint, 3> > BufU32x3;
+RESOURCE<half2> BufF16x2;
+RESOURCE<float3> BufF32x3;
+RESOURCE<int4> BufI32x4;
+// TODO: RESOURCE<snorm half> BufSNormF16; -> 11
+// TODO: RESOURCE<unorm half> BufUNormF16; -> 12
+// TODO: RESOURCE<snorm float> BufSNormF32; -> 13
+// TODO: RESOURCE<unorm float> BufUNormF32; -> 14
+// TODO: RESOURCE<snorm double> BufSNormF64; -> 15
+// TODO: RESOURCE<unorm double> BufUNormF64; -> 16
+
+[numthreads(1,1,1)]
+void main(int GI : SV_GroupIndex) {
+  int16_t v1 = BufI16[GI];
+  uint16_t v2 = BufU16[GI];
+  int v3 = BufI32[GI];
+  uint v4 = BufU32[GI];
+  int64_t v5 = BufI64[GI];
+  uint64_t v6 = BufU64[GI];
+  half v7 = BufF16[GI];
+  float v8 = BufF32[GI];
+  double v9 = BufF64[GI];
+  vector<int16_t,4> v10 = BufI16x4[GI];
+  vector<int, 3> v11 = BufU32x3[GI];
+  half2 v12 = BufF16x2[GI];
+  float3 v13 = BufF32x3[GI];
+}
diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
new file mode 100644
index 0000000..b153bda
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-methods.hlsl
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL
+// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -finclude-default-header -emit-llvm -disable-llvm-passes -o - %s | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPIRV
+
+// NOTE: SPIRV codegen for resource methods is not yet implemented
+
+Buffer<float> Buf : register(t0);
+RWBuffer<uint4> RWBuf : register(u0);
+
+// DXIL: %"class.hlsl::Buffer" = type { target("dx.TypedBuffer", float, 0, 0, 0) }
+// DXIL: %"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) }
+
+// DXIL: @Buf = internal global %"class.hlsl::Buffer" poison
+// DXIL: @RWBuf = internal global %"class.hlsl::RWBuffer" poison
+
+export float TestLoad() {
+    return Buf.Load(1) + RWBuf.Load(2).y;
+}
+
+// CHECK: define noundef nofpclass(nan inf) float @TestLoad()()
+// CHECK: call {{.*}} float @hlsl::Buffer<float>::Load(unsigned int)(ptr {{.*}} @Buf, i32 noundef 1)
+// CHECK: call {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int)(ptr {{.*}} @RWBuf, i32 noundef 2)
+// CHECK: add
+// CHECK: ret float
+
+// CHECK: define {{.*}} float @hlsl::Buffer<float>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::Buffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", float, 0, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0) %[[HANDLE]], i32 %[[INDEX]])
+// CHECK-NEXT: %[[VAL:.*]] = load float, ptr %[[PTR]]
+// CHECK-NEXT: ret float %[[VAL]]
+
+// CHECK: define {{.*}} <4 x i32> @hlsl::RWBuffer<unsigned int vector[4]>::Load(unsigned int)(ptr {{.*}} %this, i32 noundef %Index)
+// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %{{.*}}, i32 0, i32 0
+// DXIL-NEXT: %[[HANDLE:.*]] = load target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), ptr %__handle
+// CHECK-NEXT: %[[INDEX:.*]] = load i32, ptr %Index.addr
+// DXIL-NEXT: %[[PTR:.*]] = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %[[HANDLE]], i32 %[[INDEX]])
+// CHECK-NEXT: %[[VEC:.*]] = load <4 x i32>, ptr %[[PTR]]
+// CHECK-NEXT: ret <4 x i32> %[[VEC]]
+
+// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_f32_0_0_0t(target("dx.TypedBuffer", float, 0, 0, 0), i32)
+// DXIL: declare ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4i32_1_0_0t(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), i32)
diff --git a/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl b/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl
new file mode 100644
index 0000000..adc35f6
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/TypedBuffers-subscript.hlsl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=DXIL,CHECK
+// RUN: %clang_cc1 -triple spirv1.6-pc-vulkan1.3-compute -fspv-use-unknown-image-format -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=SPIRV,CHECK
+
+Buffer<int> In;
+RWBuffer<int> Out;
+
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+  // CHECK: define void @main()
+
+  // DXIL: %[[INPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_0_0_1t(target("dx.TypedBuffer", i32, 0, 0, 1) %{{.*}}, i32 %{{.*}})
+  // SPIRV: %[[INPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_1_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 1, 0) %{{.*}}, i32 %{{.*}})
+  // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
+  // DXIL: %[[OUTPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
+  // SPIRV: %[[OUTPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
+  // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]]
+  Out[GI] = In[GI];
+
+  // DXIL: %[[INPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
+  // SPIRV: %[[INPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
+  // CHECK: %[[LOAD:.*]] = load i32, ptr {{.*}}%[[INPTR]]
+  // DXIL: %[[OUTPTR:.*]] = call {{.*}} ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_i32_1_0_1t(target("dx.TypedBuffer", i32, 1, 0, 1) %{{.*}}, i32 %{{.*}})
+  // SPIRV: %[[OUTPTR:.*]] = call {{.*}} ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.SignedImage_i32_5_2_0_0_2_0t(target("spirv.SignedImage", i32, 5, 2, 0, 0, 2, 0) %{{.*}}, i32 %{{.*}})
+  // CHECK: store i32 %[[LOAD]], ptr {{.*}}%[[OUTPTR]]
+  Out[GI + 1] = Out[GI];
+}
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 9f8e833..5931b60 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -83,6 +83,8 @@ mark_as_advanced(COMPILER_RT_BUILD_ORC)
 option(COMPILER_RT_BUILD_GWP_ASAN "Build GWP-ASan, and link it into SCUDO" ON)
 mark_as_advanced(COMPILER_RT_BUILD_GWP_ASAN)
 option(COMPILER_RT_ENABLE_CET "Build Compiler RT with CET enabled" OFF)
+option(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME "Build asan unit tests without depending upon a just-built asan runtime" OFF)
+mark_as_advanced(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME)
 
 option(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH "Set custom sysroot for building SCUDO standalone" OFF)
 mark_as_advanced(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH)
diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt
index 9cd9c97..6d88c96 100644
--- a/compiler-rt/lib/asan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/asan/tests/CMakeLists.txt
@@ -170,11 +170,21 @@ function(add_asan_tests arch test_runtime)
   set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
   set(CONFIG_NAME_DYNAMIC ${ARCH_UPPER_CASE}${OS_NAME}DynamicConfig)
 
+  # On some platforms, unit tests can be run against the runtime that shipped
+  # with the host compiler with COMPILER_RT_TEST_STANDALONE_BUILD_LIBS=OFF.
+  # COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME=ON removes the dependency
+  # on `asan`, allowing the tests to be run independently without
+  # a newly built asan runtime.
+  set(ASAN_UNIT_TEST_DEPS asan)
+  if(COMPILER_RT_ASAN_UNIT_TESTS_USE_HOST_RUNTIME)
+    set(ASAN_UNIT_TEST_DEPS)
+  endif()
+
   # Closure to keep the values.
   function(generate_asan_tests test_objects test_suite testname)
     generate_compiler_rt_tests(${test_objects} ${test_suite} ${testname} ${arch}
       COMPILE_DEPS ${ASAN_UNITTEST_HEADERS} ${ASAN_IGNORELIST_FILE}
-      DEPS asan
+      DEPS ${ASAN_UNIT_TEST_DEPS}
       KIND ${TEST_KIND}
       ${ARGN}
       )
@@ -215,7 +225,7 @@ function(add_asan_tests arch test_runtime)
       add_compiler_rt_test(AsanDynamicUnitTests "${dynamic_test_name}" "${arch}"
         SUBDIR "${CONFIG_NAME_DYNAMIC}"
         OBJECTS ${ASAN_INST_TEST_OBJECTS}
-        DEPS asan ${ASAN_INST_TEST_OBJECTS}
+        DEPS ${ASAN_UNIT_TEST_DEPS} ${ASAN_INST_TEST_OBJECTS}
         LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS} ${TARGET_LINK_FLAGS} ${DYNAMIC_LINK_FLAGS}
         )
     endif()
diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h
index 7fb58be..edb2699 100644
--- a/compiler-rt/lib/msan/msan.h
+++ b/compiler-rt/lib/msan/msan.h
@@ -303,6 +303,7 @@ u32 ChainOrigin(u32 id, StackTrace *stack);
 const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1;
 const int STACK_TRACE_TAG_FIELDS = STACK_TRACE_TAG_POISON + 1;
 const int STACK_TRACE_TAG_VPTR = STACK_TRACE_TAG_FIELDS + 1;
+const int STACK_TRACE_TAG_ALLOC_PADDING = STACK_TRACE_TAG_VPTR + 1;
 
 #define GET_MALLOC_STACK_TRACE                                             \
   UNINITIALIZED BufferedStackTrace stack;                                  \
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index 64df863..80608aa 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -217,25 +217,52 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
   }
   auto *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated));
   meta->requested_size = size;
+  uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
+  void* padding_start = reinterpret_cast<char*>(allocated) + size;
+  uptr padding_size = actually_allocated_size - size;
+
+  // - With calloc(7,1), we can set the ideal tagging:
+  //     bytes 0-6:  initialized,   origin not set (and irrelevant)
+  //     byte  7:    uninitialized, origin TAG_ALLOC_PADDING
+  //     bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING
+  // - If we have malloc(7) and __msan_get_track_origins() > 1, the 4-byte
+  //   origin granularity only allows the slightly suboptimal tagging:
+  //     bytes 0-6:  uninitialized, origin TAG_ALLOC
+  //     byte  7:    uninitialized, origin TAG_ALLOC (suboptimal)
+  //     bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING
+  // - If we have malloc(7) and __msan_get_track_origins() == 1, we use a
+  //   single origin bean to reduce overhead:
+  //     bytes 0-6:  uninitialized, origin TAG_ALLOC
+  //     byte  7:    uninitialized, origin TAG_ALLOC (suboptimal)
+  //     bytes 8-15: uninitialized, origin TAG_ALLOC (suboptimal)
+  if (__msan_get_track_origins() && flags()->poison_in_malloc &&
+      (zero || (__msan_get_track_origins() > 1))) {
+    stack->tag = STACK_TRACE_TAG_ALLOC_PADDING;
+    Origin o2 = Origin::CreateHeapOrigin(stack);
+    __msan_set_origin(padding_start, padding_size, o2.raw_id());
+  }
+
   if (zero) {
     if (allocator.FromPrimary(allocated))
       __msan_clear_and_unpoison(allocated, size);
     else
       __msan_unpoison(allocated, size);  // Mem is already zeroed.
+
+    if (flags()->poison_in_malloc)
+      __msan_poison(padding_start, padding_size);
   } else if (flags()->poison_in_malloc) {
-    __msan_poison(allocated, size);
+    __msan_poison(allocated, actually_allocated_size);
+
     if (__msan_get_track_origins()) {
       stack->tag = StackTrace::TAG_ALLOC;
       Origin o = Origin::CreateHeapOrigin(stack);
-      __msan_set_origin(allocated, size, o.raw_id());
+      __msan_set_origin(
+          allocated,
+          __msan_get_track_origins() == 1 ? actually_allocated_size : size,
+          o.raw_id());
     }
   }
 
-  uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
-  // For compatibility, the allocator converted 0-sized allocations into 1 byte
-  if (size == 0 && actually_allocated_size > 0 && flags()->poison_in_malloc)
-    __msan_poison(allocated, 1);
-
   UnpoisonParam(2);
   RunMallocHooks(allocated, size);
   return allocated;
@@ -255,9 +282,10 @@ void __msan::MsanDeallocate(BufferedStackTrace *stack, void *p) {
   if (flags()->poison_in_free && allocator.FromPrimary(p)) {
     __msan_poison(p, size);
     if (__msan_get_track_origins()) {
+      uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(p);
       stack->tag = StackTrace::TAG_DEALLOC;
       Origin o = Origin::CreateHeapOrigin(stack);
-      __msan_set_origin(p, size, o.raw_id());
+      __msan_set_origin(p, actually_allocated_size, o.raw_id());
     }
   }
   if (MsanThread *t = GetCurrentThread()) {
diff --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp
index 99bf81f..cd0bf67 100644
--- a/compiler-rt/lib/msan/msan_report.cpp
+++ b/compiler-rt/lib/msan/msan_report.cpp
@@ -90,6 +90,10 @@ static void DescribeOrigin(u32 id) {
         Printf("  %sVirtual table ptr was destroyed%s\n", d.Origin(),
                d.Default());
         break;
+      case STACK_TRACE_TAG_ALLOC_PADDING:
+        Printf("  %sUninitialized value is outside of heap allocation%s\n",
+               d.Origin(), d.Default());
+        break;
       default:
         Printf("  %sUninitialized value was created%s\n", d.Origin(),
                d.Default());
diff --git a/compiler-rt/test/msan/allocator_padding.cpp b/compiler-rt/test/msan/allocator_padding.cpp
new file mode 100644
index 0000000..72acf31
--- /dev/null
+++ b/compiler-rt/test/msan/allocator_padding.cpp
@@ -0,0 +1,94 @@
+// *** malloc: all bytes are uninitialized
+// * malloc byte 0
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 0 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 0 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// * malloc byte 6
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 6 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 6 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// This test assumes the allocator allocates 16 bytes for malloc(7). Bytes
+// 7-15 are padding.
+//
+// * malloc byte 7
+// Edge case: when the origin granularity spans both ALLOC and ALLOC_PADDING,
+//            ALLOC always takes precedence.
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 7 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 7 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// Bytes 8-15 are padding
+// For track-origins=1, ALLOC is used instead of ALLOC_PADDING.
+//
+// * malloc byte 8
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 8 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 8 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * malloc byte 15
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 15 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 15 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+// *** calloc
+// Bytes 0-6 are fully initialized, so no MSan report should happen.
+//
+// * calloc byte 0
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1
+//
+// * calloc byte 6
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1
+//
+// * calloc byte 7
+// Byte 7 is uninitialized. Unlike malloc, this is tagged as ALLOC_PADDING
+// (since the origin does not need to track bytes 4-6).
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * calloc byte 8
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * calloc byte 15
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+#ifdef USE_CALLOC
+  char *p = (char *)calloc(7, 1);
+#else
+  char *p = (char *)malloc(7);
+#endif
+
+  if (argc == 2) {
+    int index = atoi(argv[1]);
+
+    printf("p[%d] = %d\n", index, p[index]);
+    // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
+    // CHECK: {{#0 0x.* in main .*allocator_padding.cpp:}}[[@LINE-2]]
+    // ORIGIN-ALLOC: Uninitialized value was created by a heap allocation
+    // ORIGIN-ALLOC-PADDING: Uninitialized value is outside of heap allocation
+    free(p);
+  }
+
+  return 0;
+}
diff --git a/compiler-rt/test/msan/zero_alloc.cpp b/compiler-rt/test/msan/zero_alloc.cpp
index 1451e1e..f4cf1d8 100644
--- a/compiler-rt/test/msan/zero_alloc.cpp
+++ b/compiler-rt/test/msan/zero_alloc.cpp
@@ -1,4 +1,9 @@
-// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 \
+// RUN:     | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,DISCOUNT
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGINS
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -10,6 +15,7 @@ int main(int argc, char **argv) {
     printf("Content of p1 is: %d\n", *p1);
     // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
     // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+    // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation
     free(p1);
   }
 
@@ -19,6 +25,7 @@ int main(int argc, char **argv) {
     printf("Content of p2 is: %d\n", *p2);
     // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
     // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+    // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation
     free(p2);
   }
 
@@ -28,6 +35,8 @@ int main(int argc, char **argv) {
     printf("Content of p2 is: %d\n", *p3);
     // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
     // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+    // DISCOUNT: Uninitialized value was created by a heap allocation
+    // ORIGINS: Uninitialized value is outside of heap allocation
     free(p3);
   }
 
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 0afb295..70bb43a2 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
   llvm::LogicalResult
   matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
+
+    if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) {
+      auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+      replaceWithAddrOfOrASCast(
+          rewriter, addr->getLoc(),
+          global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
+          getProgramAddressSpace(rewriter),
+          global ? global.getSymName()
+                 : addr.getSymbol().getRootReference().getValue(),
+          convertType(addr.getType()), addr);
+      return mlir::success();
+    }
+
     auto global = addr->getParentOfType<mlir::ModuleOp>()
                       .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
     replaceWithAddrOfOrASCast(
@@ -3231,7 +3244,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
 
     if (global.getDataAttr() &&
         *global.getDataAttr() == cuf::DataAttribute::Constant)
-      TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation");
+      g.setAddrSpace(
+          static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant));
 
     rewriter.eraseOp(global);
     return mlir::success();
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 41383fb..9bf10b5 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -353,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) {
   // calculation op.
   mlir::Value baseRef =
       llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op)
+          .Case<fir::DeclareOp>([&](auto op) {
+            // If this declare binds a view with an underlying storage operand,
+            // treat that storage as the base reference. Otherwise, fall back
+            // to the declared memref.
+            if (auto storage = op.getStorage())
+              return storage;
+            return mlir::Value(varPtr);
+          })
           .Case<hlfir::DesignateOp>([&](auto op) {
             // Get the base object.
             return op.getMemref();
diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir
index bbd3f9f..60cda9e 100644
--- a/flang/test/Fir/CUDA/cuda-code-gen.mlir
+++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir
@@ -284,3 +284,31 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e
 // CHECK-LABEL: llvm.func @_QQxxx()
 // CHECK: llvm.alloca %{{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
 // CHECK-NOT: llvm.call @_FortranACUFAllocDescriptor
+
+// -----
+
+module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
+  gpu.module @cuda_device_mod {
+    fir.global @_QMkernelsEinitial_val {data_attr = #cuf.cuda<constant>} : i32 {
+      %0 = fir.zero_bits i32
+      fir.has_value %0 : i32
+    }
+    gpu.func @_QMkernelsPassign(%arg0: !fir.ref<!fir.array<?xi32>>) kernel {
+      %c-1 = arith.constant -1 : index
+      %c1_i32 = arith.constant 1 : i32
+      %0 = arith.constant 1 : i32
+      %1 = arith.addi %0, %c1_i32 : i32
+      %2 = fir.address_of(@_QMkernelsEinitial_val) : !fir.ref<i32>
+      %4 = fir.load %2 : !fir.ref<i32>
+      %5 = fir.convert %1 : (i32) -> i64
+      %6 = fircg.ext_array_coor %arg0(%c-1)<%5> : (!fir.ref<!fir.array<?xi32>>, index, i64) -> !fir.ref<i32>
+      fir.store %4 to %6 : !fir.ref<i32>
+      gpu.return
+    }
+  }
+}
+
+// CHECK: llvm.mlir.global external @_QMkernelsEinitial_val() {addr_space = 4 : i32} : i32
+// CHECK-LABEL:  gpu.func @_QMkernelsPassign
+// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMkernelsEinitial_val : !llvm.ptr<4>
+// CHECK: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<4> to !llvm.ptr
diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir
new file mode 100644
index 0000000..fabfe4c
--- /dev/null
+++ b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir
@@ -0,0 +1,24 @@
+// Use --mlir-disable-threading so that the diagnostic printing is serialized.
+// RUN: fir-opt %s -pass-pipeline='builtin.module(test-fir-openacc-interfaces)' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s
+
+module {
+  // Build a scalar view via fir.declare with a storage operand into an array of i8
+  func.func @_QPdeclare_with_storage_is_nonscalar() {
+    %c0 = arith.constant 0 : index
+    %arr = fir.alloca !fir.array<4xi8>
+    %elem_i8 = fir.coordinate_of %arr, %c0 : (!fir.ref<!fir.array<4xi8>>, index) -> !fir.ref<i8>
+    %elem_f32 = fir.convert %elem_i8 : (!fir.ref<i8>) -> !fir.ref<f32>
+    %view = fir.declare %elem_f32 storage(%arr[0]) {uniq_name = "_QFpi"}
+      : (!fir.ref<f32>, !fir.ref<!fir.array<4xi8>>) -> !fir.ref<f32>
+    // Force interface query through an acc op that prints type category
+    %cp = acc.copyin varPtr(%view : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false}
+    acc.enter_data dataOperands(%cp : !fir.ref<f32>)
+    return
+  }
+
+  // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false}
+  // CHECK: Pointer-like and Mappable: !fir.ref<f32>
+  // CHECK: Type category: array
+}
+
+
diff --git a/lldb/cmake/modules/LLDBFramework.cmake b/lldb/cmake/modules/LLDBFramework.cmake
index c6f00ed..23d9d49 100644
--- a/lldb/cmake/modules/LLDBFramework.cmake
+++ b/lldb/cmake/modules/LLDBFramework.cmake
@@ -68,8 +68,6 @@ if(NOT APPLE_EMBEDDED)
   )
 endif()
 
-find_program(unifdef_EXECUTABLE unifdef)
-
 # Wrap output in a target, so lldb-framework can depend on it.
 add_custom_target(liblldb-resource-headers DEPENDS lldb-sbapi-dwarf-enums ${lldb_staged_headers})
 set_target_properties(liblldb-resource-headers PROPERTIES FOLDER "LLDB/Resources")
diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h
index fec9fde..1a7db8f 100644
--- a/lldb/include/lldb/lldb-enumerations.h
+++ b/lldb/include/lldb/lldb-enumerations.h
@@ -130,6 +130,8 @@ FLAGS_ENUM(LaunchFlags){
     eLaunchFlagInheritTCCFromParent =
         (1u << 12), ///< Don't make the inferior responsible for its own TCC
                     ///< permissions but instead inherit them from its parent.
+    eLaunchFlagMemoryTagging =
+        (1u << 13), ///< Launch process with memory tagging explicitly enabled.
 };
 
 /// Thread Run Modes.
diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
index 53e991a..1a2860a 100644
--- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
+++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py
@@ -1,3 +1,4 @@
+from abc import ABC, abstractmethod
 import ctypes
 import errno
 import io
@@ -5,6 +6,7 @@ import threading
 import socket
 import traceback
 from lldbsuite.support import seven
+from typing import Optional, List, Tuple
 
 
 def checksum(message):
@@ -86,7 +88,7 @@ class MockGDBServerResponder:
     handles any packet not recognized in the common packet handling code.
     """
 
-    registerCount = 40
+    registerCount: int = 40
 
     class RESPONSE_DISCONNECT:
         pass
@@ -95,7 +97,7 @@ class MockGDBServerResponder:
         pass
 
     def __init__(self):
-        self.packetLog = []
+        self.packetLog: List[str] = []
 
     def respond(self, packet):
         """
@@ -241,7 +243,7 @@ class MockGDBServerResponder:
     def qHostInfo(self):
         return "ptrsize:8;endian:little;"
 
-    def qEcho(self):
+    def qEcho(self, num: int):
         return "E04"
 
     def qQueryGDBServer(self):
@@ -262,10 +264,10 @@ class MockGDBServerResponder:
     def D(self, packet):
         return "OK"
 
-    def readRegisters(self):
+    def readRegisters(self) -> str:
         return "00000000" * self.registerCount
 
-    def readRegister(self, register):
+    def readRegister(self, register: int) -> str:
         return "00000000"
 
     def writeRegisters(self, registers_hex):
@@ -305,7 +307,9 @@ class MockGDBServerResponder:
         # SIGINT is 2, return type is 2 digit hex string
         return "S02"
 
-    def qXferRead(self, obj, annex, offset, length):
+    def qXferRead(
+        self, obj: str, annex: str, offset: int, length: int
+    ) -> Tuple[Optional[str], bool]:
         return None, False
 
     def _qXferResponse(self, data, has_more):
@@ -373,15 +377,17 @@ class MockGDBServerResponder:
         pass
 
 
-class ServerChannel:
+class ServerChannel(ABC):
     """
     A wrapper class for TCP or pty-based server.
     """
 
-    def get_connect_address(self):
+    @abstractmethod
+    def get_connect_address(self) -> str:
         """Get address for the client to connect to."""
 
-    def get_connect_url(self):
+    @abstractmethod
+    def get_connect_url(self) -> str:
         """Get URL suitable for process connect command."""
 
     def close_server(self):
@@ -393,10 +399,12 @@ class ServerChannel:
     def close_connection(self):
         """Close all resources used by the accepted connection."""
 
-    def recv(self):
+    @abstractmethod
+    def recv(self) -> bytes:
         """Receive a data packet from the connected client."""
 
-    def sendall(self, data):
+    @abstractmethod
+    def sendall(self, data: bytes) -> None:
         """Send the data to the connected client."""
 
 
@@ -427,11 +435,11 @@ class ServerSocket(ServerChannel):
         self._connection.close()
         self._connection = None
 
-    def recv(self):
+    def recv(self) -> bytes:
         assert self._connection is not None
         return self._connection.recv(4096)
 
-    def sendall(self, data):
+    def sendall(self, data: bytes) -> None:
         assert self._connection is not None
         return self._connection.sendall(data)
 
@@ -443,10 +451,10 @@ class TCPServerSocket(ServerSocket):
         )[0]
         super().__init__(family, type, proto, addr)
 
-    def get_connect_address(self):
+    def get_connect_address(self) -> str:
         return "[{}]:{}".format(*self._server_socket.getsockname())
 
-    def get_connect_url(self):
+    def get_connect_url(self) -> str:
         return "connect://" + self.get_connect_address()
 
 
@@ -454,10 +462,10 @@ class UnixServerSocket(ServerSocket):
     def __init__(self, addr):
         super().__init__(socket.AF_UNIX, socket.SOCK_STREAM, 0, addr)
 
-    def get_connect_address(self):
+    def get_connect_address(self) -> str:
         return self._server_socket.getsockname()
 
-    def get_connect_url(self):
+    def get_connect_url(self) -> str:
         return "unix-connect://" + self.get_connect_address()
 
 
@@ -471,7 +479,7 @@ class PtyServerSocket(ServerChannel):
         self._primary = io.FileIO(primary, "r+b")
         self._secondary = io.FileIO(secondary, "r+b")
 
-    def get_connect_address(self):
+    def get_connect_address(self) -> str:
         libc = ctypes.CDLL(None)
         libc.ptsname.argtypes = (ctypes.c_int,)
         libc.ptsname.restype = ctypes.c_char_p
@@ -484,7 +492,7 @@ class PtyServerSocket(ServerChannel):
         self._secondary.close()
         self._primary.close()
 
-    def recv(self):
+    def recv(self) -> bytes:
         try:
             return self._primary.read(4096)
         except OSError as e:
@@ -493,8 +501,8 @@ class PtyServerSocket(ServerChannel):
                 return b""
             raise
 
-    def sendall(self, data):
-        return self._primary.write(data)
+    def sendall(self, data: bytes) -> None:
+        self._primary.write(data)
 
 
 class MockGDBServer:
@@ -527,18 +535,21 @@ class MockGDBServer:
             self._thread.join()
             self._thread = None
 
-    def get_connect_address(self):
+    def get_connect_address(self) -> str:
+        assert self._socket is not None
         return self._socket.get_connect_address()
 
-    def get_connect_url(self):
+    def get_connect_url(self) -> str:
+        assert self._socket is not None
         return self._socket.get_connect_url()
 
     def run(self):
+        assert self._socket is not None
         # For testing purposes, we only need to worry about one client
         # connecting just one time.
         try:
             self._socket.accept()
-        except:
+        except Exception:
             traceback.print_exc()
             return
         self._shouldSendAck = True
@@ -553,7 +564,7 @@ class MockGDBServer:
                 self._receive(data)
         except self.TerminateConnectionException:
             pass
-        except Exception as e:
+        except Exception:
             print(
                 "An exception happened when receiving the response from the gdb server. Closing the client..."
             )
@@ -586,7 +597,9 @@ class MockGDBServer:
         Once a complete packet is found at the front of self._receivedData,
         its data is removed form self._receivedData.
         """
+        assert self._receivedData is not None
         data = self._receivedData
+        assert self._receivedDataOffset is not None
         i = self._receivedDataOffset
         data_len = len(data)
         if data_len == 0:
@@ -639,10 +652,13 @@ class MockGDBServer:
         self._receivedDataOffset = 0
         return packet
 
-    def _sendPacket(self, packet):
-        self._socket.sendall(seven.bitcast_to_bytes(frame_packet(packet)))
+    def _sendPacket(self, packet: str):
+        assert self._socket is not None
+        framed_packet = seven.bitcast_to_bytes(frame_packet(packet))
+        self._socket.sendall(framed_packet)
 
     def _handlePacket(self, packet):
+        assert self._socket is not None
         if packet is self.PACKET_ACK:
             # Ignore ACKs from the client. For the future, we can consider
             # adding validation code to make sure the client only sends ACKs
diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp
index 21d94d6..8ae20bd 100644
--- a/lldb/source/Commands/CommandOptionsProcessLaunch.cpp
+++ b/lldb/source/Commands/CommandOptionsProcessLaunch.cpp
@@ -127,6 +127,10 @@ Status CommandOptionsProcessLaunch::SetOptionValue(
     break;
   }
 
+  case 'M':
+    launch_info.GetFlags().Set(eLaunchFlagMemoryTagging);
+    break;
+
   case 'c':
     if (!option_arg.empty())
       launch_info.SetShell(FileSpec(option_arg));
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 595b3d0..a9f054e 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -1173,6 +1173,11 @@ let Command = "process launch" in {
         Arg<"Boolean">,
         Desc<"Set whether to shell expand arguments to the process when "
              "launching.">;
+  def process_launch_memory_tagging
+      : Option<"memory-tagging", "M">,
+        Desc<"Set whether to explicitly enable memory tagging when launching "
+             "the process.  Requires hardware support.  "
+             "(Only supported on Darwin.)">;
 }
 
 let Command = "process attach" in {
diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm
index 3c1d117..7120892 100644
--- a/lldb/source/Host/macosx/objcxx/Host.mm
+++ b/lldb/source/Host/macosx/objcxx/Host.mm
@@ -1210,6 +1210,39 @@ static Status LaunchProcessPosixSpawn(const char *exe_path,
     }
   }
 
+  if (launch_info.GetFlags().Test(eLaunchFlagMemoryTagging)) {
+    // The following function configures the spawn attributes to launch the
+    // process with memory tagging explicitly enabled.  We look it up
+    // dynamically since it is only available on newer OS.  Does nothing on
+    // hardware which does not support MTE.
+    //
+    //   int posix_spawnattr_set_use_sec_transition_shims_np(
+    //       posix_spawnattr_t *attr, uint32_t flags);
+    //
+    using posix_spawnattr_set_use_sec_transition_shims_np_t =
+        int (*)(posix_spawnattr_t *attr, uint32_t flags);
+    auto posix_spawnattr_set_use_sec_transition_shims_np_fn =
+        (posix_spawnattr_set_use_sec_transition_shims_np_t)dlsym(
+            RTLD_DEFAULT, "posix_spawnattr_set_use_sec_transition_shims_np");
+    if (posix_spawnattr_set_use_sec_transition_shims_np_fn) {
+      error =
+          Status(posix_spawnattr_set_use_sec_transition_shims_np_fn(&attr, 0),
+                 eErrorTypePOSIX);
+      if (error.Fail()) {
+        LLDB_LOG(log,
+                 "error: {0}, "
+                 "posix_spawnattr_set_use_sec_transition_shims_np(&attr, 0)",
+                 error);
+        return error;
+      }
+    } else {
+      LLDB_LOG(log,
+               "error: posix_spawnattr_set_use_sec_transition_shims_np not "
+               "available",
+               error);
+    }
+  }
+
   // Don't set the binpref if a shell was provided. After all, that's only
   // going to affect what version of the shell is launched, not what fork of
   // the binary is launched.  We insert "arch --arch <ARCH> as part of the
diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp
index c0b1079..9b7df10 100644
--- a/lldb/source/Host/windows/MainLoopWindows.cpp
+++ b/lldb/source/Host/windows/MainLoopWindows.cpp
@@ -55,11 +55,7 @@ public:
     if (m_monitor_thread.joinable()) {
       m_stopped = true;
       SetEvent(m_ready);
-      // Keep trying to cancel ReadFile() until the thread exits.
-      do {
-        CancelIoEx(m_handle, /*lpOverlapped=*/NULL);
-      } while (WaitForSingleObject(m_monitor_thread.native_handle(), 1) ==
-               WAIT_TIMEOUT);
+      CancelIoEx(m_handle, /*lpOverlapped=*/NULL);
       m_monitor_thread.join();
     }
     CloseHandle(m_event);
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
index cc0c9e7..6d8f41a 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
@@ -14,6 +14,7 @@
 #include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/Log.h"
 #include "lldb/lldb-enumerations.h"
+#include "llvm/ADT/Sequence.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -266,22 +267,47 @@ bool ClassDescriptorV2::method_list_t::Read(Process *process,
   return true;
 }
 
-bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr,
-                                       lldb::addr_t relative_selector_base_addr,
-                                       bool is_small, bool has_direct_sel) {
-  size_t ptr_size = process->GetAddressByteSize();
-  size_t size = GetSize(process, is_small);
+llvm::SmallVector<ClassDescriptorV2::method_t, 0>
+ClassDescriptorV2::ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
+                               lldb::addr_t relative_selector_base_addr,
+                               bool is_small, bool has_direct_sel) const {
+  lldb_private::Process *process = m_runtime.GetProcess();
+  if (!process)
+    return {};
 
-  DataBufferHeap buffer(size, '\0');
-  Status error;
+  const size_t size = method_t::GetSize(process, is_small);
+  const size_t num_methods = addresses.size();
 
-  process->ReadMemory(addr, buffer.GetBytes(), size, error);
-  if (error.Fail()) {
-    return false;
+  llvm::SmallVector<uint8_t, 0> buffer(num_methods * size, 0);
+  llvm::DenseSet<uint32_t> failed_indices;
+
+  for (auto [idx, addr] : llvm::enumerate(addresses)) {
+    Status error;
+    process->ReadMemory(addr, buffer.data() + idx * size, size, error);
+    if (error.Fail())
+      failed_indices.insert(idx);
   }
 
-  DataExtractor extractor(buffer.GetBytes(), size, process->GetByteOrder(),
-                          ptr_size);
+  llvm::SmallVector<method_t, 0> methods;
+  methods.reserve(num_methods);
+  for (auto [idx, addr] : llvm::enumerate(addresses)) {
+    if (failed_indices.contains(idx))
+      continue;
+    DataExtractor extractor(buffer.data() + idx * size, size,
+                            process->GetByteOrder(),
+                            process->GetAddressByteSize());
+    methods.push_back(method_t());
+    methods.back().Read(extractor, process, addr, relative_selector_base_addr,
+                        is_small, has_direct_sel);
+  }
+
+  return methods;
+}
+
+bool ClassDescriptorV2::method_t::Read(DataExtractor &extractor,
+                                       Process *process, lldb::addr_t addr,
+                                       lldb::addr_t relative_selector_base_addr,
+                                       bool is_small, bool has_direct_sel) {
   lldb::offset_t cursor = 0;
 
   if (is_small) {
@@ -291,11 +317,11 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr,
 
     m_name_ptr = addr + nameref_offset;
 
+    Status error;
     if (!has_direct_sel) {
       // The SEL offset points to a SELRef. We need to dereference twice.
-      m_name_ptr = process->ReadUnsignedIntegerFromMemory(m_name_ptr, ptr_size,
-                                                          0, error);
-      if (!error.Success())
+      m_name_ptr = process->ReadPointerFromMemory(m_name_ptr, error);
+      if (error.Fail())
         return false;
     } else if (relative_selector_base_addr != LLDB_INVALID_ADDRESS) {
       m_name_ptr = relative_selector_base_addr + nameref_offset;
@@ -308,13 +334,13 @@ bool ClassDescriptorV2::method_t::Read(Process *process, lldb::addr_t addr,
     m_imp_ptr = extractor.GetAddress_unchecked(&cursor);
   }
 
+  Status error;
   process->ReadCStringFromMemory(m_name_ptr, m_name, error);
-  if (error.Fail()) {
+  if (error.Fail())
     return false;
-  }
 
   process->ReadCStringFromMemory(m_types_ptr, m_types, error);
-  return !error.Fail();
+  return error.Success();
 }
 
 bool ClassDescriptorV2::ivar_list_t::Read(Process *process, lldb::addr_t addr) {
@@ -447,17 +473,19 @@ ClassDescriptorV2::GetMethodList(Process *process,
 bool ClassDescriptorV2::ProcessMethodList(
     std::function<bool(const char *, const char *)> const &instance_method_func,
     ClassDescriptorV2::method_list_t &method_list) const {
-  lldb_private::Process *process = m_runtime.GetProcess();
-  auto method = std::make_unique<method_t>();
-  lldb::addr_t relative_selector_base_addr =
-      m_runtime.GetRelativeSelectorBaseAddr();
-  for (uint32_t i = 0, e = method_list.m_count; i < e; ++i) {
-    method->Read(process, method_list.m_first_ptr + (i * method_list.m_entsize),
-                 relative_selector_base_addr, method_list.m_is_small,
-                 method_list.m_has_direct_selector);
-    if (instance_method_func(method->m_name.c_str(), method->m_types.c_str()))
+  auto idx_to_method_addr = [&](uint32_t idx) {
+    return method_list.m_first_ptr + (idx * method_list.m_entsize);
+  };
+  llvm::SmallVector<addr_t> addresses = llvm::to_vector(llvm::map_range(
+      llvm::seq<uint32_t>(method_list.m_count), idx_to_method_addr));
+
+  llvm::SmallVector<method_t, 0> methods =
+      ReadMethods(addresses, m_runtime.GetRelativeSelectorBaseAddr(),
+                  method_list.m_is_small, method_list.m_has_direct_selector);
+
+  for (const auto &method : methods)
+    if (instance_method_func(method.m_name.c_str(), method.m_types.c_str()))
       break;
-  }
   return true;
 }
 
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
index 920a5eb..78b3311 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
@@ -172,11 +172,16 @@ private:
              + field_size; // IMP imp;
     }
 
-    bool Read(Process *process, lldb::addr_t addr,
+    bool Read(DataExtractor &extractor, Process *process, lldb::addr_t addr,
               lldb::addr_t relative_selector_base_addr, bool is_small,
               bool has_direct_sel);
   };
 
+  llvm::SmallVector<method_t, 0>
+  ReadMethods(llvm::ArrayRef<lldb::addr_t> addresses,
+              lldb::addr_t relative_selector_base_addr, bool is_small,
+              bool has_direct_sel) const;
+
   struct ivar_list_t {
     uint32_t m_entsize;
     uint32_t m_count;
diff --git a/lldb/test/API/macosx/mte/Makefile b/lldb/test/API/macosx/mte/Makefile
index cb20942..d614e0f 100644
--- a/lldb/test/API/macosx/mte/Makefile
+++ b/lldb/test/API/macosx/mte/Makefile
@@ -1,12 +1,15 @@
 C_SOURCES := main.c
 
-EXE := uaf_mte
+EXE := uaf
 
-all: uaf_mte sign
+binary-plain:    uaf
+binary-entitled: uaf sign
+
+all: binary-entitled
 
 include Makefile.rules
 
-sign: mte-entitlements.plist uaf_mte
+sign: mte-entitlements.plist uaf
 ifeq ($(OS),Darwin)
 	codesign -s - -f --entitlements $^
 endif
diff --git a/lldb/test/API/macosx/mte/TestDarwinMTE.py b/lldb/test/API/macosx/mte/TestDarwinMTE.py
index 489e24a..a70b4b4 100644
--- a/lldb/test/API/macosx/mte/TestDarwinMTE.py
+++ b/lldb/test/API/macosx/mte/TestDarwinMTE.py
@@ -7,13 +7,25 @@ from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
 import lldbsuite.test.cpu_feature as cpu_feature
 
-exe_name = "uaf_mte"  # Must match Makefile
+exe_name = "uaf"  # Must match Makefile
 
 
 class TestDarwinMTE(TestBase):
     NO_DEBUG_INFO_TESTCASE = True
 
     @skipUnlessFeature(cpu_feature.AArch64.MTE)
+    def test_process_launch_memory_tagging(self):
+        self.build(make_targets=["binary-plain"])
+        self.createTestTarget(self.getBuildArtifact(exe_name))
+
+        self.expect("process launch", substrs=["exited with status = 0"])
+
+        self.expect(
+            "process launch --memory-tagging",
+            substrs=["stopped", "stop reason = EXC_ARM_MTE_TAG_FAULT"],
+        )
+
+    @skipUnlessFeature(cpu_feature.AArch64.MTE)
     def test_tag_fault(self):
         self.build()
         exe = self.getBuildArtifact(exe_name)
diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
index f06e7ce..bb7f3be 100644
--- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
+++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
@@ -32,8 +32,11 @@ bool IsAnnotationOK(const GlobalVariable &GV);
 /// profile information and provides methods to operate on them.
 class StaticDataProfileInfo {
 public:
-  /// Accummulate the profile count of a constant that will be lowered to static
-  /// data sections.
+  /// A constant is tracked only if the following conditions are met.
+  ///   1) It has local (i.e., private or internal) linkage.
+  //    2) Its data kind is one of {.rodata, .data, .bss, .data.rel.ro}.
+  //    3) It's eligible for section prefix annotation. See `AnnotationKind`
+  //       above for ineligible reasons.
   DenseMap<const Constant *, uint64_t> ConstantProfileCounts;
 
   /// Keeps track of the constants that are seen at least once without profile
@@ -44,6 +47,22 @@ public:
   LLVM_ABI std::optional<uint64_t>
   getConstantProfileCount(const Constant *C) const;
 
+  /// Use signed enums for enum value comparison, and make 'LukewarmOrUnknown'
+  /// as 0 so any accidentally uninitialized value will default to unknown.
+  enum class StaticDataHotness : int8_t {
+    Cold = -1,
+    LukewarmOrUnknown = 0,
+    Hot = 1,
+  };
+
+  /// Return the hotness of the constant \p C based on its profile count \p
+  /// Count.
+  LLVM_ABI StaticDataHotness getConstantHotnessUsingProfileCount(
+      const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const;
+
+  /// Return the string representation of the hotness enum \p Hotness.
+  LLVM_ABI StringRef hotnessToStr(StaticDataHotness Hotness) const;
+
 public:
   StaticDataProfileInfo() = default;
 
diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index 77ce052..2c59a52 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -43,8 +43,14 @@ public:
   ///
   /// \param LDCS Flag to indicate whether we should load the call site
   /// information from DWARF `DW_TAG_call_site` entries
-  DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
-      : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}
+  ///
+  /// \param MachO Flag to indicate if the object file is mach-o (Apple's
+  /// executable format). Apple has some compile unit attributes that look like
+  /// split DWARF, but they aren't and they can cause warnins to be emitted
+  /// about missing DWO files.
+  DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false,
+                   bool MachO = false)
+      : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS), IsMachO(MachO) {}
 
   /// Extract the DWARF from the supplied object file and convert it into the
   /// Gsym format in the GsymCreator object that is passed in. Returns an
@@ -97,6 +103,7 @@ private:
   DWARFContext &DICtx;
   GsymCreator &Gsym;
   bool LoadDwarfCallSites;
+  bool IsMachO;
 
   friend class DwarfTransformerTest;
 };
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h
index 596cc18..b0197f0 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/AllocationActions.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H
 #define LLVM_EXECUTIONENGINE_ORC_SHARED_ALLOCATIONACTIONS_H
 
-#include "llvm/ADT/FunctionExtras.h"
 #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
 #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
 #include "llvm/Support/Compiler.h"
@@ -54,9 +53,6 @@ inline size_t numDeallocActions(const AllocActions &AAs) {
       AAs, [](const AllocActionCallPair &P) { return !!P.Dealloc; });
 }
 
-using OnRunFinalizeActionsCompleteFn =
-    unique_function<void(Expected<std::vector<WrapperFunctionCall>>)>;
-
 /// Run finalize actions.
 ///
 /// If any finalize action fails then the corresponding dealloc actions will be
@@ -67,16 +63,13 @@ using OnRunFinalizeActionsCompleteFn =
 /// be returned. The dealloc actions should be run by calling
 /// runDeallocationActions. If this function succeeds then the AA argument will
 /// be cleared before the function returns.
-LLVM_ABI void runFinalizeActions(AllocActions &AAs,
-                                 OnRunFinalizeActionsCompleteFn OnComplete);
-
-using OnRunDeallocActionsComeleteFn = unique_function<void(Error)>;
+LLVM_ABI Expected<std::vector<WrapperFunctionCall>>
+runFinalizeActions(AllocActions &AAs);
 
 /// Run deallocation actions.
 /// Dealloc actions will be run in reverse order (from last element of DAs to
 /// first).
-LLVM_ABI void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs,
-                                OnRunDeallocActionsComeleteFn OnComplete);
+LLVM_ABI Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs);
 
 using SPSAllocActionCallPair =
     SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>;
diff --git a/llvm/include/llvm/TableGen/CodeGenHelpers.h b/llvm/include/llvm/TableGen/CodeGenHelpers.h
index 5b823db..ce91f62 100644
--- a/llvm/include/llvm/TableGen/CodeGenHelpers.h
+++ b/llvm/include/llvm/TableGen/CodeGenHelpers.h
@@ -34,6 +34,21 @@ private:
   raw_ostream &OS;
 };
 
+// Simple RAII helper for emitting header include guard (ifndef-define-endif).
+class IncludeGuardEmitter {
+public:
+  IncludeGuardEmitter(raw_ostream &OS, StringRef Name)
+      : Name(Name.str()), OS(OS) {
+    OS << "#ifndef " << Name << "\n"
+       << "#define " << Name << "\n\n";
+  }
+  ~IncludeGuardEmitter() { OS << "\n#endif // " << Name << "\n"; }
+
+private:
+  std::string Name;
+  raw_ostream &OS;
+};
+
 // Simple RAII helper for emitting namespace scope. Name can be a single
 // namespace (empty for anonymous namespace) or nested namespace.
 class NamespaceEmitter {
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4bafd3f..a64b93d 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6417,8 +6417,18 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S,
   case scSequentialUMinExpr:
     return GetGCDMultiple(cast<SCEVNAryExpr>(S));
   case scUnknown: {
-    // ask ValueTracking for known bits
+    // Ask ValueTracking for known bits. SCEVUnknown only become available at
+    // the point their underlying IR instruction has been defined. If CtxI was
+    // not provided, use:
+    // * the first instruction in the entry block if it is an argument
+    // * the instruction itself otherwise.
     const SCEVUnknown *U = cast<SCEVUnknown>(S);
+    if (!CtxI) {
+      if (isa<Argument>(U->getValue()))
+        CtxI = &*F.getEntryBlock().begin();
+      else if (auto *I = dyn_cast<Instruction>(U->getValue()))
+        CtxI = I;
+    }
     unsigned Known =
         computeKnownBits(U->getValue(), getDataLayout(), &AC, CtxI, &DT)
             .countMinTrailingZeros();
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
index 1f751ee..e7f0b2c 100644
--- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -60,6 +60,36 @@ void StaticDataProfileInfo::addConstantProfileCount(
     OriginalCount = getInstrMaxCountValue();
 }
 
+StaticDataProfileInfo::StaticDataHotness
+StaticDataProfileInfo::getConstantHotnessUsingProfileCount(
+    const Constant *C, const ProfileSummaryInfo *PSI, uint64_t Count) const {
+  // The accummulated counter shows the constant is hot. Return enum 'hot'
+  // whether this variable is seen by unprofiled functions or not.
+  if (PSI->isHotCount(Count))
+    return StaticDataHotness::Hot;
+  // The constant is not hot, and seen by unprofiled functions. We don't want to
+  // assign it to unlikely sections, even if the counter says 'cold'. So return
+  // enum 'LukewarmOrUnknown'.
+  if (ConstantWithoutCounts.count(C))
+    return StaticDataHotness::LukewarmOrUnknown;
+  // The accummulated counter shows the constant is cold so return enum 'cold'.
+  if (PSI->isColdCount(Count))
+    return StaticDataHotness::Cold;
+
+  return StaticDataHotness::LukewarmOrUnknown;
+}
+
+StringRef StaticDataProfileInfo::hotnessToStr(StaticDataHotness Hotness) const {
+  switch (Hotness) {
+  case StaticDataHotness::Cold:
+    return "unlikely";
+  case StaticDataHotness::Hot:
+    return "hot";
+  default:
+    return "";
+  }
+}
+
 std::optional<uint64_t>
 StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
   auto I = ConstantProfileCounts.find(C);
@@ -70,23 +100,10 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
 
 StringRef StaticDataProfileInfo::getConstantSectionPrefix(
     const Constant *C, const ProfileSummaryInfo *PSI) const {
-  auto Count = getConstantProfileCount(C);
+  std::optional<uint64_t> Count = getConstantProfileCount(C);
   if (!Count)
     return "";
-  // The accummulated counter shows the constant is hot. Return 'hot' whether
-  // this variable is seen by unprofiled functions or not.
-  if (PSI->isHotCount(*Count))
-    return "hot";
-  // The constant is not hot, and seen by unprofiled functions. We don't want to
-  // assign it to unlikely sections, even if the counter says 'cold'. So return
-  // an empty prefix before checking whether the counter is cold.
-  if (ConstantWithoutCounts.count(C))
-    return "";
-  // The accummulated counter shows the constant is cold. Return 'unlikely'.
-  if (PSI->isColdCount(*Count))
-    return "unlikely";
-  // The counter says lukewarm. Return an empty prefix.
-  return "";
+  return hotnessToStr(getConstantHotnessUsingProfileCount(C, PSI, *Count));
 }
 
 bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 0e38017..d2f2c3e 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -661,7 +661,10 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
 void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
                                SmallVectorImpl<SlotIndex> *EndPoints) {
   LiveQueryResult LRQ = LR.Query(Kill);
-  VNInfo *VNI = LRQ.valueOutOrDead();
+  // LR may have liveness reachable from early clobber slot, which may be
+  // only live-in instead of live-out of the instruction.
+  // For example, LR =[1r, 3r), Kill = 3e, we have to prune [3e, 3r) of LR.
+  VNInfo *VNI = LRQ.valueOutOrDead() ? LRQ.valueOutOrDead() : LRQ.valueIn();
   if (!VNI)
     return;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 787a81a..358e060 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -658,13 +658,13 @@ namespace {
                           bool InexpensiveOnly = false,
                           std::optional<EVT> OutVT = std::nullopt);
     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
-    SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
-    SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
-    SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
+    SDValue buildRsqrtEstimate(SDValue Op);
+    SDValue buildSqrtEstimate(SDValue Op);
+    SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
-                                SDNodeFlags Flags, bool Reciprocal);
+                                bool Reciprocal);
     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
-                                SDNodeFlags Flags, bool Reciprocal);
+                                bool Reciprocal);
     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                bool DemandHighBits = true);
     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -18590,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
     // If this FDIV is part of a reciprocal square root, it may be folded
     // into a target-specific square root estimate instruction.
     if (N1.getOpcode() == ISD::FSQRT) {
-      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
+      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-      if (SDValue RV =
-              buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
         AddToWorklist(RV.getNode());
         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
       }
     } else if (N1.getOpcode() == ISD::FP_ROUND &&
                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-      if (SDValue RV =
-              buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
+      if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
         AddToWorklist(RV.getNode());
         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -18635,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
             SDValue AAZ =
                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
-            if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+            if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
 
             // Estimate creation failed. Clean up speculatively created nodes.
@@ -18645,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
 
         // We found a FSQRT, so try to make this fold:
         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
-        if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+        if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
           AddToWorklist(Div.getNode());
           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
@@ -18742,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
     return SDValue();
 
   // FSQRT nodes have flags that propagate to the created nodes.
+  SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
   //       transform the fdiv, we may produce a sub-optimal estimate sequence
   //       because the reciprocal calculation may not have to filter out a
   //       0.0 input.
-  return buildSqrtEstimate(N0, Flags);
+  return buildSqrtEstimate(N0);
 }
 
 /// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -29743,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
 /// As a result, we precompute A/2 prior to the iteration loop.
 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
-                                         unsigned Iterations,
-                                         SDNodeFlags Flags, bool Reciprocal) {
+                                         unsigned Iterations, bool Reciprocal) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
 
   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   // this entire sequence requires only one FP constant.
-  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
-  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
 
   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
   for (unsigned i = 0; i < Iterations; ++i) {
-    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
-    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
-    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
   }
 
   // If non-reciprocal square root is requested, multiply the result by Arg.
   if (!Reciprocal)
-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
 
   return Est;
 }
@@ -29775,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
 ///     =>
 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
-                                         unsigned Iterations,
-                                         SDNodeFlags Flags, bool Reciprocal) {
+                                         unsigned Iterations, bool Reciprocal) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -29789,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
   // Newton iterations for reciprocal square root:
   // E = (E * -0.5) * ((A * E) * E + -3.0)
   for (unsigned i = 0; i < Iterations; ++i) {
-    SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
-    SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
-    SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+    SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
+    SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
+    SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
 
     // When calculating a square root at the last iteration build:
     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -29799,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
     SDValue LHS;
     if (Reciprocal || (i + 1) < Iterations) {
       // RSQRT: LHS = (E * -0.5)
-      LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+      LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
     } else {
       // SQRT: LHS = (A * E) * -0.5
-      LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+      LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
     }
 
-    Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
   }
 
   return Est;
@@ -29814,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
 /// Op can be zero.
-SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
-                                           bool Reciprocal) {
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) {
   if (LegalDAG)
     return SDValue();
 
@@ -29843,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
 
     if (Iterations > 0)
       Est = UseOneConstNR
-            ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
-            : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+                ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
+                : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
     if (!Reciprocal) {
       SDLoc DL(Op);
       // Try the target specific test first.
@@ -29862,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
   return SDValue();
 }
 
-SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
-  return buildSqrtEstimateImpl(Op, Flags, true);
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) {
+  return buildSqrtEstimateImpl(Op, true);
 }
 
-SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
-  return buildSqrtEstimateImpl(Op, Flags, false);
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) {
+  return buildSqrtEstimateImpl(Op, false);
 }
 
 /// Return true if there is any possibility that the two addresses overlap.
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 7a0256f..fa39603 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -338,9 +338,13 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
     if (FilePath.empty()) {
       // If we had a DW_AT_decl_file, but got no file then we need to emit a
       // warning.
+      const uint64_t DwarfFileIdx = dwarf::toUnsigned(
+          Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
+      // Check if there is no DW_AT_decl_line attribute, and don't report an
+      // error if it isn't there.
+      if (DwarfFileIdx == UINT32_MAX)
+        return;
       Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
-        const uint64_t DwarfFileIdx = dwarf::toUnsigned(
-            Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
         OS << "error: function DIE at " << HEX32(Die.getOffset())
            << " has an invalid file index " << DwarfFileIdx
            << " in its DW_AT_decl_file attribute, unable to create a single "
@@ -629,6 +633,10 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
   size_t NumBefore = Gsym.getNumFunctionInfos();
   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
     DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
+    // Apple uses DW_AT_GNU_dwo_id for things other than split DWARF.
+    if (IsMachO)
+      return ReturnDie;
+
     if (DwarfUnit.getDWOId()) {
       DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
       if (!DWOCU->isDWOUnit())
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 5b3c05e..6c7e27e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -260,22 +260,17 @@ public:
     }
 
     // Run finalization actions.
-    using WrapperFunctionCall = orc::shared::WrapperFunctionCall;
-    runFinalizeActions(
-        G->allocActions(),
-        [this, OnFinalized = std::move(OnFinalized)](
-            Expected<std::vector<WrapperFunctionCall>> DeallocActions) mutable {
-          completeFinalization(std::move(OnFinalized),
-                               std::move(DeallocActions));
-        });
-  }
+    auto DeallocActions = runFinalizeActions(G->allocActions());
+    if (!DeallocActions) {
+      OnFinalized(DeallocActions.takeError());
+      return;
+    }
 
-  void abandon(OnAbandonedFunction OnAbandoned) override {
-    Error Err = Error::success();
-    if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
-      Err = joinErrors(std::move(Err), errorCodeToError(EC));
-    if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
-      Err = joinErrors(std::move(Err), errorCodeToError(EC));
+    // Release the finalize segments slab.
+    if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
+      OnFinalized(errorCodeToError(EC));
+      return;
+    }
 
 #ifndef NDEBUG
     // Set 'G' to null to flag that we've been successfully finalized.
@@ -284,22 +279,17 @@ public:
     G = nullptr;
 #endif
 
-    OnAbandoned(std::move(Err));
+    // Continue with finalized allocation.
+    OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
+                                            std::move(*DeallocActions)));
   }
 
-private:
-  void completeFinalization(
-      OnFinalizedFunction OnFinalized,
-      Expected<std::vector<orc::shared::WrapperFunctionCall>> DeallocActions) {
-
-    if (!DeallocActions)
-      return OnFinalized(DeallocActions.takeError());
-
-    // Release the finalize segments slab.
-    if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
-      OnFinalized(errorCodeToError(EC));
-      return;
-    }
+  void abandon(OnAbandonedFunction OnAbandoned) override {
+    Error Err = Error::success();
+    if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
+      Err = joinErrors(std::move(Err), errorCodeToError(EC));
+    if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
+      Err = joinErrors(std::move(Err), errorCodeToError(EC));
 
 #ifndef NDEBUG
     // Set 'G' to null to flag that we've been successfully finalized.
@@ -308,11 +298,10 @@ private:
     G = nullptr;
 #endif
 
-    // Continue with finalized allocation.
-    OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
-                                            std::move(*DeallocActions)));
+    OnAbandoned(std::move(Err));
   }
 
+private:
   Error applyProtections() {
     for (auto &KV : BL.segments()) {
       const auto &AG = KV.first;
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index 7b327af..7e606c6a 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -91,19 +91,9 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
       sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size);
   }
 
-  std::vector<shared::WrapperFunctionCall> DeinitializeActions;
-  {
-    std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P;
-    auto F = P.get_future();
-    shared::runFinalizeActions(
-        AI.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) {
-          P.set_value(std::move(R));
-        });
-    if (auto DeinitializeActionsOrErr = F.get())
-      DeinitializeActions = std::move(*DeinitializeActionsOrErr);
-    else
-      return OnInitialized(DeinitializeActionsOrErr.takeError());
-  }
+  auto DeinitializeActions = shared::runFinalizeActions(AI.Actions);
+  if (!DeinitializeActions)
+    return OnInitialized(DeinitializeActions.takeError());
 
   {
     std::lock_guard<std::mutex> Lock(Mutex);
@@ -111,7 +101,7 @@ void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
     // This is the maximum range whose permission have been possibly modified
     auto &Alloc = Allocations[MinAddr];
     Alloc.Size = MaxAddr - MinAddr;
-    Alloc.DeinitializationActions = std::move(DeinitializeActions);
+    Alloc.DeinitializationActions = std::move(*DeinitializeActions);
     Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr);
   }
 
@@ -128,10 +118,10 @@ void InProcessMemoryMapper::deinitialize(
 
     for (auto Base : llvm::reverse(Bases)) {
 
-      shared::runDeallocActions(
-          Allocations[Base].DeinitializationActions, [&](Error Err) {
-            AllErr = joinErrors(std::move(AllErr), std::move(Err));
-          });
+      if (Error Err = shared::runDeallocActions(
+              Allocations[Base].DeinitializationActions)) {
+        AllErr = joinErrors(std::move(AllErr), std::move(Err));
+      }
 
       // Reset protections to read/write so the area can be reused
       if (auto EC = sys::Memory::protectMappedMemory(
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
index 08ab0c6..91f2899 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp
@@ -12,39 +12,31 @@ namespace llvm {
 namespace orc {
 namespace shared {
 
-void runFinalizeActions(AllocActions &AAs,
-                        OnRunFinalizeActionsCompleteFn OnComplete) {
+Expected<std::vector<WrapperFunctionCall>>
+runFinalizeActions(AllocActions &AAs) {
   std::vector<WrapperFunctionCall> DeallocActions;
   DeallocActions.reserve(numDeallocActions(AAs));
 
   for (auto &AA : AAs) {
     if (AA.Finalize)
-
-      if (auto Err = AA.Finalize.runWithSPSRetErrorMerged()) {
-        while (!DeallocActions.empty()) {
-          Err = joinErrors(std::move(Err),
-                           DeallocActions.back().runWithSPSRetErrorMerged());
-          DeallocActions.pop_back();
-        }
-        return OnComplete(std::move(Err));
-      }
+      if (auto Err = AA.Finalize.runWithSPSRetErrorMerged())
+        return joinErrors(std::move(Err), runDeallocActions(DeallocActions));
 
     if (AA.Dealloc)
       DeallocActions.push_back(std::move(AA.Dealloc));
   }
 
   AAs.clear();
-  OnComplete(std::move(DeallocActions));
+  return DeallocActions;
 }
 
-void runDeallocActions(ArrayRef<WrapperFunctionCall> DAs,
-                       OnRunDeallocActionsComeleteFn OnComplete) {
+Error runDeallocActions(ArrayRef<WrapperFunctionCall> DAs) {
   Error Err = Error::success();
   while (!DAs.empty()) {
     Err = joinErrors(std::move(Err), DAs.back().runWithSPSRetErrorMerged());
     DAs = DAs.drop_back();
   }
-  OnComplete(std::move(Err));
+  return Err;
 }
 
 } // namespace shared
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
index 8c24b1f..4fbf232 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
@@ -9,10 +9,8 @@
 #include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h"
 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/WindowsError.h"
-#include <future>
 #include <sstream>
 
 #if defined(LLVM_ON_UNIX)
@@ -183,24 +181,15 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
   }
 
   // Run finalization actions and get deinitlization action list.
-  std::vector<shared::WrapperFunctionCall> DeinitializeActions;
-  {
-    std::promise<MSVCPExpected<std::vector<shared::WrapperFunctionCall>>> P;
-    auto F = P.get_future();
-    shared::runFinalizeActions(
-        FR.Actions, [&](Expected<std::vector<shared::WrapperFunctionCall>> R) {
-          P.set_value(std::move(R));
-        });
-    if (auto DeinitializeActionsOrErr = F.get())
-      DeinitializeActions = std::move(*DeinitializeActionsOrErr);
-    else
-      return DeinitializeActionsOrErr.takeError();
+  auto DeinitializeActions = shared::runFinalizeActions(FR.Actions);
+  if (!DeinitializeActions) {
+    return DeinitializeActions.takeError();
   }
 
   {
     std::lock_guard<std::mutex> Lock(Mutex);
     Allocations[MinAddr].DeinitializationActions =
-        std::move(DeinitializeActions);
+        std::move(*DeinitializeActions);
     Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr);
   }
 
@@ -221,11 +210,10 @@ Error ExecutorSharedMemoryMapperService::deinitialize(
     std::lock_guard<std::mutex> Lock(Mutex);
 
     for (auto Base : llvm::reverse(Bases)) {
-      shared::runDeallocActions(
-          Allocations[Base].DeinitializationActions, [&](Error Err) {
-            if (Err)
-              AllErr = joinErrors(std::move(AllErr), std::move(Err));
-          });
+      if (Error Err = shared::runDeallocActions(
+              Allocations[Base].DeinitializationActions)) {
+        AllErr = joinErrors(std::move(AllErr), std::move(Err));
+      }
 
       // Remove the allocation from the allocation list of its reservation
       for (auto &Reservation : Reservations) {
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index ca81d30..8ace2d2 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <cstdint>
 #include <optional>
 
 using namespace llvm;
@@ -193,7 +194,12 @@ void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) {
         dxbc::PSV::v2::ResourceBindInfo BindInfo;
         BindInfo.Type = Type;
         BindInfo.LowerBound = Binding.LowerBound;
-        BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1;
+        assert(Binding.Size == UINT32_MAX ||
+               (uint64_t)Binding.LowerBound + Binding.Size - 1 <= UINT32_MAX &&
+                   "Resource range is too large");
+        BindInfo.UpperBound = (Binding.Size == UINT32_MAX)
+                                  ? UINT32_MAX
+                                  : Binding.LowerBound + Binding.Size - 1;
         BindInfo.Space = Binding.Space;
         BindInfo.Kind = static_cast<dxbc::PSV::ResourceKind>(Kind);
         BindInfo.Flags = Flags;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 47c24fc..f973949 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -601,6 +601,29 @@ static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL,
   MachineOperand Src = MI.getOperand(3);
   MachineOperand Len = MI.getOperand(4);
 
+  // If the length is a constant, we don't actually need the check.
+  if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
+    if (Def->getOpcode() == WebAssembly::CONST_I32 ||
+        Def->getOpcode() == WebAssembly::CONST_I64) {
+      if (Def->getOperand(1).getImm() == 0) {
+        // A zero-length memcpy is a no-op.
+        MI.eraseFromParent();
+        return BB;
+      }
+      // A non-zero-length memcpy doesn't need a zero check.
+      unsigned MemoryCopy =
+          Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
+      BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
+          .add(DstMem)
+          .add(SrcMem)
+          .add(Dst)
+          .add(Src)
+          .add(Len);
+      MI.eraseFromParent();
+      return BB;
+    }
+  }
+
   // We're going to add an extra use to `Len` to test if it's zero; that
   // use shouldn't be a kill, even if the original use is.
   MachineOperand NoKillLen = Len;
@@ -669,6 +692,28 @@ static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL,
   MachineOperand Val = MI.getOperand(2);
   MachineOperand Len = MI.getOperand(3);
 
+  // If the length is a constant, we don't actually need the check.
+  if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
+    if (Def->getOpcode() == WebAssembly::CONST_I32 ||
+        Def->getOpcode() == WebAssembly::CONST_I64) {
+      if (Def->getOperand(1).getImm() == 0) {
+        // A zero-length memset is a no-op.
+        MI.eraseFromParent();
+        return BB;
+      }
+      // A non-zero-length memset doesn't need a zero check.
+      unsigned MemoryFill =
+          Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
+      BuildMI(*BB, MI, DL, TII.get(MemoryFill))
+          .add(Mem)
+          .add(Dst)
+          .add(Val)
+          .add(Len);
+      MI.eraseFromParent();
+      return BB;
+    }
+  }
+
   // We're going to add an extra use to `Len` to test if it's zero; that
   // use shouldn't be a kill, even if the original use is.
   MachineOperand NoKillLen = Len;
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index d2f09e9..578fec7 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2667,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
   for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
     Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
 
-  V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
+  // No profiling support for vector selects.
+  V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
+                                         DEBUG_TYPE, Name + "blend");
 
   LLVM_DEBUG(dbgs() << "    blend: " << *V << "\n");
   return V;
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll
new file mode 100644
index 0000000..ebab9f0
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s --data-layout="e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64 %s
+; RUN: opt < %s --data-layout="e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32 %s
+
+declare void @useptr(ptr)
+
+define void @ptrtoaddr(ptr %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) {
+; X64-LABEL: 'ptrtoaddr'
+; X64-NEXT:  Classifying expressions for: @ptrtoaddr
+; X64-NEXT:    %p0 = ptrtoaddr ptr %in to i64
+; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:    %p1 = ptrtoaddr ptr %in to i32
+; X64-NEXT:    --> %p1 U: full-set S: full-set
+; X64-NEXT:    %p2 = ptrtoaddr ptr %in to i16
+; X64-NEXT:    --> %p2 U: full-set S: full-set
+; X64-NEXT:    %p3 = ptrtoaddr ptr %in to i128
+; X64-NEXT:    --> %p3 U: full-set S: full-set
+; X64-NEXT:  Determining loop execution counts for: @ptrtoaddr
+;
+; X32-LABEL: 'ptrtoaddr'
+; X32-NEXT:  Classifying expressions for: @ptrtoaddr
+; X32-NEXT:    %p0 = ptrtoaddr ptr %in to i64
+; X32-NEXT:    --> %p0 U: full-set S: full-set
+; X32-NEXT:    %p1 = ptrtoaddr ptr %in to i32
+; X32-NEXT:    --> %p1 U: full-set S: full-set
+; X32-NEXT:    %p2 = ptrtoaddr ptr %in to i16
+; X32-NEXT:    --> %p2 U: full-set S: full-set
+; X32-NEXT:    %p3 = ptrtoaddr ptr %in to i128
+; X32-NEXT:    --> %p3 U: full-set S: full-set
+; X32-NEXT:  Determining loop execution counts for: @ptrtoaddr
+;
+  %p0 = ptrtoaddr ptr %in to i64
+  %p1 = ptrtoaddr ptr %in to i32
+  %p2 = ptrtoaddr ptr %in to i16
+  %p3 = ptrtoaddr ptr %in to i128
+  store i64  %p0, ptr  %out0
+  store i32  %p1, ptr  %out1
+  store i16  %p2, ptr  %out2
+  store i128 %p3, ptr %out3
+  ret void
+}
+
+define void @ptrtoaddr_as1(ptr addrspace(1) %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) {
+; X64-LABEL: 'ptrtoaddr_as1'
+; X64-NEXT:  Classifying expressions for: @ptrtoaddr_as1
+; X64-NEXT:    %p0 = ptrtoaddr ptr addrspace(1) %in to i64
+; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:    %p1 = ptrtoaddr ptr addrspace(1) %in to i32
+; X64-NEXT:    --> %p1 U: full-set S: full-set
+; X64-NEXT:    %p2 = ptrtoaddr ptr addrspace(1) %in to i16
+; X64-NEXT:    --> %p2 U: full-set S: full-set
+; X64-NEXT:    %p3 = ptrtoaddr ptr addrspace(1) %in to i128
+; X64-NEXT:    --> %p3 U: full-set S: full-set
+; X64-NEXT:  Determining loop execution counts for: @ptrtoaddr_as1
+;
+; X32-LABEL: 'ptrtoaddr_as1'
+; X32-NEXT:  Classifying expressions for: @ptrtoaddr_as1
+; X32-NEXT:    %p0 = ptrtoaddr ptr addrspace(1) %in to i64
+; X32-NEXT:    --> %p0 U: full-set S: full-set
+; X32-NEXT:    %p1 = ptrtoaddr ptr addrspace(1) %in to i32
+; X32-NEXT:    --> %p1 U: full-set S: full-set
+; X32-NEXT:    %p2 = ptrtoaddr ptr addrspace(1) %in to i16
+; X32-NEXT:    --> %p2 U: full-set S: full-set
+; X32-NEXT:    %p3 = ptrtoaddr ptr addrspace(1) %in to i128
+; X32-NEXT:    --> %p3 U: full-set S: full-set
+; X32-NEXT:  Determining loop execution counts for: @ptrtoaddr_as1
+;
+  %p0 = ptrtoaddr ptr addrspace(1) %in to i64
+  %p1 = ptrtoaddr ptr addrspace(1) %in to i32
+  %p2 = ptrtoaddr ptr addrspace(1) %in to i16
+  %p3 = ptrtoaddr ptr addrspace(1) %in to i128
+  store i64  %p0, ptr  %out0
+  store i32  %p1, ptr  %out1
+  store i16  %p2, ptr  %out2
+  store i128 %p3, ptr %out3
+  ret void
+}
+
+define void @ptrtoaddr_of_bitcast(ptr %in, ptr %out0) {
+; X64-LABEL: 'ptrtoaddr_of_bitcast'
+; X64-NEXT:  Classifying expressions for: @ptrtoaddr_of_bitcast
+; X64-NEXT:    %in_casted = bitcast ptr %in to ptr
+; X64-NEXT:    --> %in U: full-set S: full-set
+; X64-NEXT:    %p0 = ptrtoaddr ptr %in_casted to i64
+; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:  Determining loop execution counts for: @ptrtoaddr_of_bitcast
+;
+; X32-LABEL: 'ptrtoaddr_of_bitcast'
+; X32-NEXT:  Classifying expressions for: @ptrtoaddr_of_bitcast
+; X32-NEXT:    %in_casted = bitcast ptr %in to ptr
+; X32-NEXT:    --> %in U: full-set S: full-set
+; X32-NEXT:    %p0 = ptrtoaddr ptr %in_casted to i64
+; X32-NEXT:    --> %p0 U: full-set S: full-set
+; X32-NEXT:  Determining loop execution counts for: @ptrtoaddr_of_bitcast
+;
+  %in_casted = bitcast ptr %in to ptr
+  %p0 = ptrtoaddr ptr %in_casted to i64
+  store i64 %p0, ptr %out0
+  ret void
+}
+
+define void @ptrtoaddr_of_nullptr(ptr %out0) {
+; ALL-LABEL: 'ptrtoaddr_of_nullptr'
+; ALL-NEXT:  Classifying expressions for: @ptrtoaddr_of_nullptr
+; ALL-NEXT:    %p0 = ptrtoaddr ptr null to i64
+; ALL-NEXT:    --> %p0 U: full-set S: full-set
+; ALL-NEXT:  Determining loop execution counts for: @ptrtoaddr_of_nullptr
+;
+  %p0 = ptrtoaddr ptr null to i64
+  store i64 %p0, ptr %out0
+  ret void
+}
+
+define void @ptrtoaddr_of_gep(ptr %in, ptr %out0) {
+; X64-LABEL: 'ptrtoaddr_of_gep'
+; X64-NEXT:  Classifying expressions for: @ptrtoaddr_of_gep
+; X64-NEXT:    %in_adj = getelementptr inbounds i8, ptr %in, i64 42
+; X64-NEXT:    --> (42 + %in) U: full-set S: full-set
+; X64-NEXT:    %p0 = ptrtoaddr ptr %in_adj to i64
+; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:  Determining loop execution counts for: @ptrtoaddr_of_gep
+;
+; X32-LABEL: 'ptrtoaddr_of_gep'
+; X32-NEXT:  Classifying expressions for: @ptrtoaddr_of_gep
+; X32-NEXT:    %in_adj = getelementptr inbounds i8, ptr %in, i64 42
+; X32-NEXT:    --> (42 + %in) U: full-set S: full-set
+; X32-NEXT:    %p0 = ptrtoaddr ptr %in_adj to i64
+; X32-NEXT:    --> %p0 U: full-set S: full-set
+; X32-NEXT:  Determining loop execution counts for: @ptrtoaddr_of_gep
+;
+  %in_adj = getelementptr inbounds i8, ptr %in, i64 42
+  %p0 = ptrtoaddr ptr %in_adj to i64
+  store i64  %p0, ptr  %out0
+  ret void
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
index 7ba422d..a477465c 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
@@ -578,22 +578,22 @@ define void @test_ptr_aligned_by_2_and_4_via_assumption(ptr %start, ptr %end) {
 ; CHECK-LABEL: 'test_ptr_aligned_by_2_and_4_via_assumption'
 ; CHECK-NEXT:  Classifying expressions for: @test_ptr_aligned_by_2_and_4_via_assumption
 ; CHECK-NEXT:    %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ptr_aligned_by_2_and_4_via_assumption
 ; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:      Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
 ; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:      Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
 ; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:      Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
 ;
 entry:
   call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ]
@@ -615,9 +615,9 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) {
 ; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption'
 ; CHECK-NEXT:  Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption
 ; CHECK-NEXT:    %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption
 ; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4611686018427387903
@@ -644,9 +644,9 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) {
 ; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption'
 ; CHECK-NEXT:  Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption
 ; CHECK-NEXT:    %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %start)<nuw><nsw>,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption
 ; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4611686018427387903
@@ -677,22 +677,22 @@ define void @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors(ptr
 ; CHECK-NEXT:    %c = call i1 @cond()
 ; CHECK-NEXT:    --> %c U: full-set S: full-set
 ; CHECK-NEXT:    %iv = phi ptr [ %start, %then ], [ %start, %else ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors
 ; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:      Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
 ; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:      Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
 ; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:      Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0
 ;
 entry:
   call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ]
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
index bea0310..70224fc 100644
--- a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
+++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll
@@ -94,6 +94,18 @@ define void @main() #0 {
   %uav2_2 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
               @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f32_1_0(
                   i32 4, i32 0, i32 10, i32 5, ptr null)
+
+  ; RWBuffer<float4> UnboundedArray[] : register(u10, space5)
+; CHECK:        - Type:            UAVTyped
+; CHECK:          Space:           5
+; CHECK:          LowerBound:      10
+; CHECK:          UpperBound:      4294967295
+; CHECK:          Kind:            TypedBuffer
+; CHECK:          Flags:
+; CHECK:            UsedByAtomic64:  false
+  ; RWBuffer<float4> Buf = BufferArray[100];
+  %uav3 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+              @llvm.dx.resource.handlefrombinding(i32 5, i32 10, i32 -1, i32 100, ptr null)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
index bf31ccb..559cc53 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll
@@ -32,6 +32,40 @@ define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
   ret void
 }
 
+define void @select_v32i8_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v32i8_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI2_0)
+; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <32 x i8>, ptr %a0
+  %v1 = load <32 x i8>, ptr %a1
+  %sel = select <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i8> %v0, <32 x i8> %v1
+  store <32 x i8> %sel, ptr %res
+  ret void
+}
+
+define void @select_v32i8_2(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v32i8_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <32 x i8>, ptr %a0
+  %v1 = load <32 x i8>, ptr %a1
+  %sel = select <32 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> %v1
+  store <32 x i8> %sel, ptr %res
+  ret void
+}
+
 define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v16i16:
 ; CHECK:       # %bb.0:
@@ -49,6 +83,40 @@ define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
   ret void
 }
 
+define void @select_v16i16_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i16_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI5_0)
+; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI5_0)
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i16>, ptr %a0
+  %v1 = load <16 x i16>, ptr %a1
+  %sel = select <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %v0, <16 x i16> %v1
+  store <16 x i16> %sel, ptr %res
+  ret void
+}
+
+define void @select_v16i16_2(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i16_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i16>, ptr %a0
+  %v1 = load <16 x i16>, ptr %a1
+  %sel = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i16> %v0, <16 x i16> %v1
+  store <16 x i16> %sel, ptr %res
+  ret void
+}
+
 define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v8i32:
 ; CHECK:       # %bb.0:
@@ -65,19 +133,70 @@ define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
   ret void
 }
 
+define void @select_v8i32_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8i32_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI8_0)
+; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i32>, ptr %a0
+  %v1 = load <8 x i32>, ptr %a1
+  %sel = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i32> %v0, <8 x i32> %v1
+  store <8 x i32> %sel, ptr %res
+  ret void
+}
+
+define void @select_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI9_0)
+; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI9_0)
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x float>, ptr %a0
+  %v1 = load <8 x float>, ptr %a1
+  %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false>, <8 x float> %v0, <8 x float> %v1
+  store <8 x float> %sel, ptr %res
+  ret void
+}
+
 define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI10_0)
+; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI10_0)
 ; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
   %v1 = load <4 x i64>, ptr %a1
-  %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %v0, <4 x i64> %v1
+  %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v0, <4 x i64> %v1
   store <4 x i64> %sel, ptr %res
   ret void
 }
+
+define void @select_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvld $xr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI11_0)
+; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI11_0)
+; CHECK-NEXT:    xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <4 x double>, ptr %a0
+  %v1 = load <4 x double>, ptr %a1
+  %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v0, <4 x double> %v1
+  store <4 x double> %sel, ptr %res
+  ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
index 8f25a6b..25c4f09 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
@@ -16,6 +16,20 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind {
   ret void
 }
 
+define void @select_v16i8_imm_1(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: select_v16i8_imm_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vrepli.h $vr1, -256
+; CHECK-NEXT:    vbitseli.b $vr1, $vr0, 1
+; CHECK-NEXT:    vst $vr1, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i8>, ptr %a0
+  %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %v0
+  store <16 x i8> %sel, ptr %res
+  ret void
+}
+
 define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v16i8:
 ; CHECK:       # %bb.0:
@@ -32,6 +46,40 @@ define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
   ret void
 }
 
+define void @select_v16i8_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i8_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI3_0)
+; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI3_0)
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i8>, ptr %a0
+  %v1 = load <16 x i8>, ptr %a1
+  %sel = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %v0, <16 x i8> %v1
+  store <16 x i8> %sel, ptr %res
+  ret void
+}
+
+define void @select_v16i8_2(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v16i8_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <16 x i8>, ptr %a0
+  %v1 = load <16 x i8>, ptr %a1
+  %sel = select <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <16 x i8> %v0, <16 x i8> %v1
+  store <16 x i8> %sel, ptr %res
+  ret void
+}
+
 define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v8i16:
 ; CHECK:       # %bb.0:
@@ -49,6 +97,40 @@ define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
   ret void
 }
 
+define void @select_v8i16_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8i16_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI6_0)
+; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI6_0)
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i16>, ptr %a0
+  %v1 = load <8 x i16>, ptr %a1
+  %sel = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1
+  store <8 x i16> %sel, ptr %res
+  ret void
+}
+
+define void @select_v8i16_2(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v8i16_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI7_0)
+; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI7_0)
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <8 x i16>, ptr %a0
+  %v1 = load <8 x i16>, ptr %a1
+  %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1
+  store <8 x i16> %sel, ptr %res
+  ret void
+}
+
 define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v4i32:
 ; CHECK:       # %bb.0:
@@ -65,13 +147,47 @@ define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
   ret void
 }
 
+define void @select_v4i32_1(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v4i32_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI9_0)
+; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI9_0)
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <4 x i32>, ptr %a0
+  %v1 = load <4 x i32>, ptr %a1
+  %sel = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> %v0, <4 x i32> %v1
+  store <4 x i32> %sel, ptr %res
+  ret void
+}
+
+define void @select_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI10_0)
+; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI10_0)
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <4 x float>, ptr %a0
+  %v1 = load <4 x float>, ptr %a1
+  %sel = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %v0, <4 x float> %v1
+  store <4 x float> %sel, ptr %res
+  ret void
+}
+
 define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a1, 0
 ; CHECK-NEXT:    vld $vr1, $a2, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI11_0)
+; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI11_0)
 ; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
@@ -81,3 +197,20 @@ define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
   store <2 x i64> %sel, ptr %res
   ret void
 }
+
+define void @select_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind {
+; CHECK-LABEL: select_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a2, 0
+; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI12_0)
+; CHECK-NEXT:    vld $vr2, $a1, %pc_lo12(.LCPI12_0)
+; CHECK-NEXT:    vbitsel.v $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+  %v0 = load <2 x double>, ptr %a0
+  %v1 = load <2 x double>, ptr %a1
+  %sel = select <2 x i1> <i1 false, i1 true>, <2 x double> %v0, <2 x double> %v1
+  store <2 x double> %sel, ptr %res
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index e71f59c..cad684e 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 {
 ;
 ; GLOBAL-LABEL: sqrt_afn_ieee:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
-; GLOBAL-NEXT:    xsabsdp 0, 1
-; GLOBAL-NEXT:    lfs 2, .LCPI11_1@toc@l(3)
-; GLOBAL-NEXT:    fcmpu 0, 0, 2
-; GLOBAL-NEXT:    xxlxor 0, 0, 0
-; GLOBAL-NEXT:    blt 0, .LBB11_2
-; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    vspltisw 2, -3
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
-; GLOBAL-NEXT:    xvcvsxwdp 2, 34
-; GLOBAL-NEXT:    xsmulsp 1, 1, 0
-; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
+; GLOBAL-NEXT:    xvcvsxwdp 3, 34
+; GLOBAL-NEXT:    xsmulsp 2, 1, 0
+; GLOBAL-NEXT:    xsabsdp 1, 1
+; GLOBAL-NEXT:    xsmaddasp 3, 2, 0
 ; GLOBAL-NEXT:    lfs 0, .LCPI11_0@toc@l(3)
-; GLOBAL-NEXT:    xsmulsp 0, 1, 0
-; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB11_2:
-; GLOBAL-NEXT:    fmr 1, 0
+; GLOBAL-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
+; GLOBAL-NEXT:    xsmulsp 0, 2, 0
+; GLOBAL-NEXT:    lfs 2, .LCPI11_1@toc@l(3)
+; GLOBAL-NEXT:    xssubsp 1, 1, 2
+; GLOBAL-NEXT:    xxlxor 2, 2, 2
+; GLOBAL-NEXT:    xsmulsp 0, 0, 3
+; GLOBAL-NEXT:    fsel 1, 1, 0, 2
 ; GLOBAL-NEXT:    blr
   %rt = call afn ninf float @llvm.sqrt.f32(float %x)
   ret float %rt
@@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
 ;
 ; GLOBAL-LABEL: sqrt_afn_preserve_sign:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    xxlxor 0, 0, 0
-; GLOBAL-NEXT:    fcmpu 0, 1, 0
-; GLOBAL-NEXT:    beq 0, .LBB13_2
-; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    vspltisw 2, -3
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
-; GLOBAL-NEXT:    xvcvsxwdp 2, 34
-; GLOBAL-NEXT:    xsmulsp 1, 1, 0
-; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
+; GLOBAL-NEXT:    xvcvsxwdp 3, 34
+; GLOBAL-NEXT:    xsmulsp 2, 1, 0
+; GLOBAL-NEXT:    xsmaddasp 3, 2, 0
 ; GLOBAL-NEXT:    lfs 0, .LCPI13_0@toc@l(3)
-; GLOBAL-NEXT:    xsmulsp 0, 1, 0
-; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB13_2:
-; GLOBAL-NEXT:    fmr 1, 0
+; GLOBAL-NEXT:    xsmulsp 0, 2, 0
+; GLOBAL-NEXT:    xxlxor 2, 2, 2
+; GLOBAL-NEXT:    xsmulsp 0, 0, 3
+; GLOBAL-NEXT:    fsel 2, 1, 2, 0
+; GLOBAL-NEXT:    xsnegdp 1, 1
+; GLOBAL-NEXT:    fsel 1, 1, 2, 0
 ; GLOBAL-NEXT:    blr
   %rt = call afn ninf float @llvm.sqrt.f32(float %x)
   ret float %rt
@@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 {
 ;
 ; GLOBAL-LABEL: sqrt_fast_ieee:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
-; GLOBAL-NEXT:    xsabsdp 0, 1
-; GLOBAL-NEXT:    lfs 2, .LCPI15_1@toc@l(3)
-; GLOBAL-NEXT:    fcmpu 0, 0, 2
-; GLOBAL-NEXT:    xxlxor 0, 0, 0
-; GLOBAL-NEXT:    blt 0, .LBB15_2
-; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    vspltisw 2, -3
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
-; GLOBAL-NEXT:    xvcvsxwdp 2, 34
-; GLOBAL-NEXT:    xsmulsp 1, 1, 0
-; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
+; GLOBAL-NEXT:    xvcvsxwdp 3, 34
+; GLOBAL-NEXT:    xsmulsp 2, 1, 0
+; GLOBAL-NEXT:    xsabsdp 1, 1
+; GLOBAL-NEXT:    xsmaddasp 3, 2, 0
 ; GLOBAL-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
-; GLOBAL-NEXT:    xsmulsp 0, 1, 0
-; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB15_2:
-; GLOBAL-NEXT:    fmr 1, 0
+; GLOBAL-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
+; GLOBAL-NEXT:    xsmulsp 0, 2, 0
+; GLOBAL-NEXT:    lfs 2, .LCPI15_1@toc@l(3)
+; GLOBAL-NEXT:    xssubsp 1, 1, 2
+; GLOBAL-NEXT:    xxlxor 2, 2, 2
+; GLOBAL-NEXT:    xsmulsp 0, 0, 3
+; GLOBAL-NEXT:    fsel 1, 1, 0, 2
 ; GLOBAL-NEXT:    blr
   %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x)
   ret float %rt
@@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
 ;
 ; GLOBAL-LABEL: sqrt_fast_preserve_sign:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    xxlxor 0, 0, 0
-; GLOBAL-NEXT:    fcmpu 0, 1, 0
-; GLOBAL-NEXT:    beq 0, .LBB16_2
-; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    vspltisw 2, -3
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
-; GLOBAL-NEXT:    xvcvsxwdp 2, 34
-; GLOBAL-NEXT:    xsmulsp 1, 1, 0
-; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
+; GLOBAL-NEXT:    xvcvsxwdp 3, 34
+; GLOBAL-NEXT:    xsmulsp 2, 1, 0
+; GLOBAL-NEXT:    xsmaddasp 3, 2, 0
 ; GLOBAL-NEXT:    lfs 0, .LCPI16_0@toc@l(3)
-; GLOBAL-NEXT:    xsmulsp 0, 1, 0
-; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB16_2:
-; GLOBAL-NEXT:    fmr 1, 0
+; GLOBAL-NEXT:    xsmulsp 0, 2, 0
+; GLOBAL-NEXT:    xxlxor 2, 2, 2
+; GLOBAL-NEXT:    xsmulsp 0, 0, 3
+; GLOBAL-NEXT:    fsel 2, 1, 2, 0
+; GLOBAL-NEXT:    xsnegdp 1, 1
+; GLOBAL-NEXT:    fsel 1, 1, 2, 0
 ; GLOBAL-NEXT:    blr
   %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x)
   ret float %rt
diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll
new file mode 100644
index 0000000..c19e93d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O1 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
+
+define i32 @pr134424(i64 %input_value, i32 %base_value, i1 %cond_flag1, i1 %cond_flag2, i1 %cond_flag3) {
+; CHECK-LABEL: pr134424:
+; CHECK:       # %bb.0: # %for.body.us.preheader.i
+; CHECK-NEXT:    andi a3, a3, 1
+; CHECK-NEXT:    andi a5, a2, 1
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, zero
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 14
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    bnez a5, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %for.body.us.preheader.i
+; CHECK-NEXT:    li a2, 1
+; CHECK-NEXT:  .LBB0_2: # %for.body.us.preheader.i
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    andi a4, a4, 1
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    bnez a3, .LBB0_4
+; CHECK-NEXT:  # %bb.3: # %for.body.us.preheader.i
+; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:  .LBB0_4: # %for.body.us.preheader.i
+; CHECK-NEXT:    vmsle.vi v0, v8, 0
+; CHECK-NEXT:    sext.w a2, a2
+; CHECK-NEXT:    bnez a4, .LBB0_6
+; CHECK-NEXT:  # %bb.5: # %for.body.us.preheader.i
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:  .LBB0_6: # %for.body.us.preheader.i
+; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vredmin.vs v8, v8, v8
+; CHECK-NEXT:    vmv.x.s a3, v8
+; CHECK-NEXT:    sext.w a1, a1
+; CHECK-NEXT:    bge a3, a2, .LBB0_11
+; CHECK-NEXT:  # %bb.7: # %for.body.us.preheader.i
+; CHECK-NEXT:    bge a0, a1, .LBB0_12
+; CHECK-NEXT:  .LBB0_8: # %for.body.us.preheader.i
+; CHECK-NEXT:    blt a3, a0, .LBB0_10
+; CHECK-NEXT:  .LBB0_9: # %for.body.us.preheader.i
+; CHECK-NEXT:    mv a3, a0
+; CHECK-NEXT:  .LBB0_10: # %for.body.us.preheader.i
+; CHECK-NEXT:    sw a3, 0(zero)
+; CHECK-NEXT:    li a0, 0
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_11: # %for.body.us.preheader.i
+; CHECK-NEXT:    mv a3, a2
+; CHECK-NEXT:    blt a0, a1, .LBB0_8
+; CHECK-NEXT:  .LBB0_12: # %for.body.us.preheader.i
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    bge a3, a0, .LBB0_9
+; CHECK-NEXT:    j .LBB0_10
+for.body.us.preheader.i:
+  %partial_vector = insertelement <4 x i64> zeroinitializer, i64 %input_value, i64 1
+  %comparison_vector = shufflevector <4 x i64> %partial_vector, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+  %comparison_result = icmp sle <4 x i64> %comparison_vector, zeroinitializer
+  %selected_value1 = select i1 %cond_flag1, i32 %base_value, i32 1
+  %selected_value2 = select i1 %cond_flag2, i32 %base_value, i32 1
+  %selected_value3 = select i1 %cond_flag3, i32 %base_value, i32 1
+  %bool_to_int = zext <4 x i1> %comparison_result to <4 x i32>
+  %extended_vector = shufflevector <4 x i32> %bool_to_int, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vector_min = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %extended_vector)
+  %min1 = call i32 @llvm.smin.i32(i32 %vector_min, i32 %selected_value1)
+  %min2 = call i32 @llvm.smin.i32(i32 %selected_value2, i32 %selected_value3)
+  %final_min = call i32 @llvm.smin.i32(i32 %min1, i32 %min2)
+  store i32 %final_min, ptr null, align 4
+  ret i32 0
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir
new file mode 100644
index 0000000..aeab8f6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir
@@ -0,0 +1,57 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=register-coalescer -o - %s | FileCheck %s
+
+---
+name:           pr71023
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: pr71023
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $x10, $v8, $v10
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF
+  ; CHECK-NEXT:   undef [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_2:vrn8m1 = PseudoVMV_V_I_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+  ; CHECK-NEXT:   [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_6:vrn8m1 = COPY undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2
+  ; CHECK-NEXT:   BNE undef [[DEF]], $x0, %bb.3
+  ; CHECK-NEXT:   PseudoBR %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BNE undef [[DEF]], $x0, %bb.3
+  ; CHECK-NEXT:   PseudoBR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   dead [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF
+  ; CHECK-NEXT:   early-clobber [[PseudoVMV_V_I_M1_]].sub_vrm1_0:vrn8m1 = PseudoVRGATHER_VI_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_0, [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+  ; CHECK-NEXT:   PseudoVSSEG6E8_V_M1_MASK [[PseudoVMV_V_I_M1_]].sub_vrm1_0_sub_vrm1_1_sub_vrm1_2_sub_vrm1_3_sub_vrm1_4_sub_vrm1_5, undef [[DEF]], killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1)
+  ; CHECK-NEXT:   PseudoRET
+  bb.0:
+    successors: %bb.3(0x40000000), %bb.1(0x40000000)
+    liveins: $x10, $v8, $v10
+    %0:gpr = IMPLICIT_DEF
+    %1:vrnov0 = PseudoVMV_V_I_M1 undef %1, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+    %2:vrnov0 = IMPLICIT_DEF
+    undef %3.sub_vrm1_0:vrn6m1nov0 = COPY undef %1
+    %3.sub_vrm1_3:vrn6m1nov0 = COPY %2
+    %3.sub_vrm1_4:vrn6m1nov0 = COPY undef %1
+    BNE undef %0, $x0, %bb.3
+    PseudoBR %bb.1
+  bb.1:
+    successors: %bb.3(0x40000000), %bb.2(0x40000000)
+    BNE killed undef %0, $x0, %bb.3
+    PseudoBR %bb.2
+  bb.2:
+    successors: %bb.3(0x80000000)
+  bb.3:
+    %4:vr = IMPLICIT_DEF
+    early-clobber %4:vr = PseudoVRGATHER_VI_M1 undef %4, killed %1, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+    undef %5.sub_vrm1_0:vrn6m1 = COPY killed %4
+    %5.sub_vrm1_5:vrn6m1 = COPY killed %2
+    PseudoVSSEG6E8_V_M1_MASK killed %5, undef %0, killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1)
+    PseudoRET
+...
diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
index ae170d7..d949068 100644
--- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
+++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
@@ -104,6 +104,31 @@ define void @memset_i32(ptr %dest, i8 %val, i32 %len) {
   ret void
 }
 
+; CHECK-LABEL: memcpy_0:
+; CHECK-NEXT: .functype memcpy_0 (i32, i32) -> ()
+; CHECK-NEXT: return
+define void @memcpy_0(ptr %dest, ptr %src) {
+  call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0)
+  ret void
+}
+
+; CHECK-LABEL: memmove_0:
+; CHECK-NEXT: .functype memmove_0 (i32, i32) -> ()
+; CHECK-NEXT: return
+define void @memmove_0(ptr %dest, ptr %src) {
+  call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0)
+  ret void
+}
+
+; CHECK-LABEL: memset_0:
+; NO-BULK-MEM-NOT: memory.fill
+; BULK-MEM-NEXT: .functype memset_0 (i32, i32) -> ()
+; BULK-MEM-NEXT: return
+define void @memset_0(ptr %dest, i8 %val) {
+  call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 0, i1 0)
+  ret void
+}
+
 ; CHECK-LABEL: memcpy_1:
 ; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> ()
 ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
@@ -137,14 +162,8 @@ define void @memset_1(ptr %dest, i8 %val) {
 ; CHECK-LABEL: memcpy_1024:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> ()
-; BULK-MEM-NEXT: block
 ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
 ; BULK-MEM-NEXT: return
 define void @memcpy_1024(ptr %dest, ptr %src) {
   call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0)
@@ -154,14 +173,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) {
 ; CHECK-LABEL: memmove_1024:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> ()
-; BULK-MEM-NEXT: block
 ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
 ; BULK-MEM-NEXT: return
 define void @memmove_1024(ptr %dest, ptr %src) {
   call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0)
@@ -171,14 +184,8 @@ define void @memmove_1024(ptr %dest, ptr %src) {
 ; CHECK-LABEL: memset_1024:
 ; NO-BULK-MEM-NOT: memory.fill
 ; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> ()
-; BULK-MEM-NEXT: block
 ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L1]]
-; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L2]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]]
 ; BULK-MEM-NEXT: return
 define void @memset_1024(ptr %dest, i8 %val) {
   call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 1024, i1 0)
@@ -201,17 +208,11 @@ define void @memset_1024(ptr %dest, i8 %val) {
 ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> ()
 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
 ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L4]]
-; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]]
-; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]]
 ; BULK-MEM-NEXT: return
 define void @memcpy_alloca_src(ptr %dst) {
   %a = alloca [100 x i8]
@@ -224,17 +225,11 @@ define void @memcpy_alloca_src(ptr %dst) {
 ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> ()
 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
 ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L4]]
-; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]]
-; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]]
 ; BULK-MEM-NEXT: return
 define void @memcpy_alloca_dst(ptr %src) {
   %a = alloca [100 x i8]
@@ -247,17 +242,11 @@ define void @memcpy_alloca_dst(ptr %src) {
 ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
 ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer
 ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i32.sub $1=, $pop[[L0]], $pop[[L1]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i32.eqz $push[[L3:[0-9]+]]=, $pop[[L2]]
-; BULK-MEM-NEXT: br_if 0, $pop[[L3]]
-; BULK-MEM-NEXT: i32.const $push[[L4:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i32.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]]
-; BULK-MEM-NEXT: i32.const $push[[L6:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]
+; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]]
 ; BULK-MEM-NEXT: return
 define void @memset_alloca(i8 %val) {
   %a = alloca [100 x i8]
diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll
index 0cf8493..d0206a3 100644
--- a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll
+++ b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll
@@ -110,6 +110,31 @@ define void @memset_i32(ptr %dest, i8 %val, i64 %len) {
   ret void
 }
 
+; CHECK-LABEL: memcpy_0:
+; CHECK-NEXT: .functype memcpy_0 (i64, i64) -> ()
+; CHECK-NEXT: return
+define void @memcpy_0(ptr %dest, ptr %src) {
+  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0)
+  ret void
+}
+
+; CHECK-LABEL: memmove_0:
+; CHECK-NEXT: .functype memmove_0 (i64, i64) -> ()
+; CHECK-NEXT: return
+define void @memmove_0(ptr %dest, ptr %src) {
+  call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0)
+  ret void
+}
+
+; CHECK-LABEL: memset_0:
+; NO-BULK-MEM-NOT: memory.fill
+; BULK-MEM-NEXT: .functype memset_0 (i64, i32) -> ()
+; BULK-MEM-NEXT: return
+define void @memset_0(ptr %dest, i8 %val) {
+  call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 0, i1 0)
+  ret void
+}
+
 ; CHECK-LABEL: memcpy_1:
 ; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> ()
 ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
@@ -143,14 +168,8 @@ define void @memset_1(ptr %dest, i8 %val) {
 ; CHECK-LABEL: memcpy_1024:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> ()
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const	$push[[L1:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i64.eqz 	$push0=, $pop[[L1]]
-; BULK-MEM-NEXT: br_if   	0, $pop0
 ; BULK-MEM-NEXT: i64.const	$push[[L0:[0-9]+]]=, 1024
 ; BULK-MEM-NEXT: memory.copy	0, 0, $0, $1, $pop[[L0]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
 ; BULK-MEM-NEXT: return
 define void @memcpy_1024(ptr %dest, ptr %src) {
   call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0)
@@ -160,14 +179,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) {
 ; CHECK-LABEL: memmove_1024:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> ()
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const	$push[[L1:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i64.eqz 	$push0=, $pop[[L1]]
-; BULK-MEM-NEXT: br_if   	0, $pop0
 ; BULK-MEM-NEXT: i64.const	$push[[L0:[0-9]+]]=, 1024
 ; BULK-MEM-NEXT: memory.copy	0, 0, $0, $1, $pop[[L0]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
 ; BULK-MEM-NEXT: return
 define void @memmove_1024(ptr %dest, ptr %src) {
   call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0)
@@ -177,14 +190,8 @@ define void @memmove_1024(ptr %dest, ptr %src) {
 ; CHECK-LABEL: memset_1024:
 ; NO-BULK-MEM-NOT: memory.fill
 ; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> ()
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const	$push[[L1:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: i64.eqz 	$push0=, $pop[[L1]]
-; BULK-MEM-NEXT: br_if   	0, $pop0
 ; BULK-MEM-NEXT: i64.const	$push[[L0:[0-9]+]]=, 1024
 ; BULK-MEM-NEXT: memory.fill	0, $0, $1, $pop[[L0]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
 ; BULK-MEM-NEXT: return
 define void @memset_1024(ptr %dest, i8 %val) {
   call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 1024, i1 0)
@@ -207,17 +214,11 @@ define void @memset_1024(ptr %dest, i8 %val) {
 ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> ()
 ; BULK-MEM-NEXT: global.get	$push[[L1:[0-9]+]]=, __stack_pointer
 ; BULK-MEM-NEXT: i64.const	$push[[L0:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i64.sub 	$[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const	$push[[L3:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i64.eqz 	$push[[L4:[0-9]+]]=, $pop[[L3]]
-; BULK-MEM-NEXT: br_if   	0, $pop[[L4]]
-; BULK-MEM-NEXT: i64.const	$push[[L5:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i64.add 	$push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]]
-; BULK-MEM-NEXT: i64.const	$push[[L7:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.copy	0, 0, $0, $pop[[L6]], $pop[[L7]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i64.sub 	$push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
+; BULK-MEM-NEXT: i64.const	$push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i64.add 	$push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i64.const	$push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.copy	0, 0, $0, $pop[[L4]], $pop[[L5]]
 ; BULK-MEM-NEXT: return
 define void @memcpy_alloca_src(ptr %dst) {
   %a = alloca [100 x i8]
@@ -230,17 +231,11 @@ define void @memcpy_alloca_src(ptr %dst) {
 ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> ()
 ; BULK-MEM-NEXT: global.get	$push[[L1:[0-9]+]]=, __stack_pointer
 ; BULK-MEM-NEXT: i64.const	$push[[L0:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i64.sub 	$[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const	$push[[L3:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i64.eqz 	$push[[L4:[0-9]+]]=, $pop[[L3]]
-; BULK-MEM-NEXT: br_if   	0, $pop[[L4]]
-; BULK-MEM-NEXT: i64.const	$push[[L5:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i64.add 	$push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]]
-; BULK-MEM-NEXT: i64.const	$push[[L7:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.copy	0, 0, $pop[[L6]], $0, $pop[[L7]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i64.sub 	$push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
+; BULK-MEM-NEXT: i64.const	$push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i64.add 	$push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i64.const	$push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.copy	0, 0, $pop[[L4]], $0, $pop[[L5]]
 ; BULK-MEM-NEXT: return
 define void @memcpy_alloca_dst(ptr %src) {
   %a = alloca [100 x i8]
@@ -253,17 +248,11 @@ define void @memcpy_alloca_dst(ptr %src) {
 ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> ()
 ; BULK-MEM-NEXT: global.get	$push[[L1:[0-9]+]]=, __stack_pointer
 ; BULK-MEM-NEXT: i64.const	$push[[L0:[0-9]+]]=, 112
-; BULK-MEM-NEXT: i64.sub 	$1=, $pop[[L1]], $pop[[L0]]
-; BULK-MEM-NEXT: block
-; BULK-MEM-NEXT: i64.const	$push[[L2:[0-9]+]]=, 100
-; BULK-MEM-NEXT: i64.eqz 	$push[[L3:[0-9]+]]=, $pop[[L2]]
-; BULK-MEM-NEXT: br_if   	0, $pop[[L3]]
-; BULK-MEM-NEXT: i64.const	$push[[L4:[0-9]+]]=, 12
-; BULK-MEM-NEXT: i64.add 	$push[[L5:[0-9]+]]=, $1, $pop[[L4]]
-; BULK-MEM-NEXT: i64.const	$push[[L6:[0-9]+]]=, 100
-; BULK-MEM-NEXT: memory.fill	0, $pop[[L5]], $0, $pop[[L6]]
-; BULK-MEM-NEXT: .LBB{{.*}}:
-; BULK-MEM-NEXT: end_block
+; BULK-MEM-NEXT: i64.sub 	$push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]]
+; BULK-MEM-NEXT: i64.const	$push[[L3:[0-9]+]]=, 12
+; BULK-MEM-NEXT: i64.add 	$push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]
+; BULK-MEM-NEXT: i64.const	$push[[L5:[0-9]+]]=, 100
+; BULK-MEM-NEXT: memory.fill	0, $pop[[L4]], $0, $pop[[L5]]
 ; BULK-MEM-NEXT: return
 define void @memset_alloca(i8 %val) {
   %a = alloca [100 x i8]
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index cdf6bdd..caec02e 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4765,8 +4765,8 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32
 }
 declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>)
 
-define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
-; X64-LABEL: pr163023:
+define <16 x i32> @pr163023_sext(ptr %a0, <16 x i32> %a1) {
+; X64-LABEL: pr163023_sext:
 ; X64:       # %bb.0:
 ; X64-NEXT:    kxnorw %k0, %k0, %k1
 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
@@ -4774,7 +4774,7 @@ define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; X64-NEXT:    retq
 ;
-; X86-LABEL: pr163023:
+; X86-LABEL: pr163023_sext:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    kxnorw %k0, %k0, %k1
@@ -4788,7 +4788,40 @@ define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
   %ofs = sext <16 x i32> %a1 to <16 x i64>
   %addr = add nuw <16 x i64> %addr.splat, %ofs
   %ptr = inttoptr <16 x i64> %addr to <16 x ptr>
-  %gather = tail call fastcc <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison)
+  %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison)
+  ret <16 x i32> %gather
+}
+
+define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) {
+; X64-LABEL: pr163023_zext:
+; X64:       # %bb.0:
+; X64-NEXT:    vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; X64-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-NEXT:    kxnorw %k0, %k0, %k1
+; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; X64-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; X64-NEXT:    kxnorw %k0, %k0, %k2
+; X64-NEXT:    vpgatherqd (%rdi,%zmm0), %ymm3 {%k2}
+; X64-NEXT:    vpgatherqd (%rdi,%zmm1), %ymm2 {%k1}
+; X64-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm0
+; X64-NEXT:    retq
+;
+; X86-LABEL: pr163023_zext:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kxnorw %k0, %k0, %k1
+; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X86-NEXT:    vpgatherdd (%eax,%zmm0), %zmm1 {%k1}
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0
+; X86-NEXT:    retl
+  %addr.p = ptrtoint ptr %a0 to i64
+  %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0
+  %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer
+  %ofs = zext <16 x i32> %a1 to <16 x i64>
+  %addr = add nuw <16 x i64> %addr.splat, %ofs
+  %ptr = inttoptr <16 x i64> %addr to <16 x ptr>
+  %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison)
   ret <16 x i32> %gather
 }
 
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 42617c1..18588aa 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+  ; CHECK-NEXT:   [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]]
   ; CHECK-NEXT:   [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
   ; CHECK-NEXT:   [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool)
   ; CHECK-NEXT:   [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr
@@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
+  ; CHECK-NEXT:   [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]]
   ; CHECK-NEXT:   [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
   ; CHECK-NEXT:   [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool)
   ; CHECK-NEXT:   [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr
diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td
index 3eda077..475faf9 100644
--- a/llvm/test/TableGen/directive1.td
+++ b/llvm/test/TableGen/directive1.td
@@ -177,6 +177,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
 // CHECK-NEXT:    static constexpr bool is_iterable = true;
 // CHECK-NEXT:  };
 // CHECK-NEXT:  } // namespace llvm
+// CHECK-EMPTY:
 // CHECK-NEXT:  #endif // LLVM_Tdl_INC
 
 
diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td
index a25197c..ccc0944 100644
--- a/llvm/test/TableGen/directive2.td
+++ b/llvm/test/TableGen/directive2.td
@@ -150,6 +150,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> {
 // CHECK-NEXT:    static constexpr bool is_iterable = true;
 // CHECK-NEXT:  };
 // CHECK-NEXT:  } // namespace llvm
+// CHECK-EMPTY:
 // CHECK-NEXT:  #endif // LLVM_Tdl_INC
 
 // IMPL:      #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
new file mode 100644
index 0000000..e4322cf
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll
@@ -0,0 +1,588 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
+
+define i32 @umax_phi_used_outside(ptr %src, i32 %n) {
+; CHECK-LABEL: define i32 @umax_phi_used_outside(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
+; CHECK-NEXT:    [[L_EXT:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEXT:    [[SPEC_SELECT]] = tail call i32 @llvm.umax.i32(i32 [[MAX]], i32 [[L_EXT]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[MAX_LCSSA:%.*]] = phi i32 [ [[MAX]], %[[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[MAX_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %spec.select, %loop ]
+  %gep.src = getelementptr inbounds i8, ptr %src, i32 %iv
+  %l = load i8, ptr %gep.src
+  %l.ext = zext i8 %l to i32
+  %spec.select = tail call i32 @llvm.umax.i32(i32 %max, i32 %l.ext)
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv, %n
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret i32 %max
+}
+
+define i32 @chained_smax(i32 %x, ptr %src) {
+; CHECK-LABEL: define i32 @chained_smax(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[VEC_PHI]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK:       [[PRED_LOAD_IF]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
+; CHECK:       [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
+; CHECK:       [[PRED_LOAD_IF1]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK:       [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT:    br i1 [[TMP14]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
+; CHECK:       [[PRED_LOAD_IF3]]:
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE4]]
+; CHECK:       [[PRED_LOAD_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ]
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT:    br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]]
+; CHECK:       [[PRED_LOAD_IF5]]:
+; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE6]]
+; CHECK:       [[PRED_LOAD_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ]
+; CHECK-NEXT:    [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]])
+; CHECK-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]])
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i32 [[TMP28]]
+;
+entry:
+  br label %loop
+
+loop:                              ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+  %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv
+  %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max)
+  %l = load i32, ptr %gep.src, align 4
+  %max.next = tail call i32 @llvm.smax.i32(i32 %l, i32 %max.1)
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 1
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret i32 %max.next
+}
+
+define void @smax_with_invariant_store_user(ptr noalias %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @smax_with_invariant_store_user(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT:    store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
+  %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+  store i32 %max.next, ptr %dst, align 4
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, %n
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @smax_with_multiple_invariant_store_user_same_addr(ptr noalias %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT:    store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
+  %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+  store i32 %max.next, ptr %dst, align 4
+  %iv.next = add i64 %iv, 1
+  store i32 %max.next, ptr %dst, align 4
+  %ec = icmp eq i64 %iv, %n
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @smax_with_multiple_invariant_store_user_same_addr2(ptr noalias %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr2(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT:    store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    store i32 0, ptr [[DST]], align 4
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
+  %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+  store i32 %max.next, ptr %dst, align 4
+  %iv.next = add i64 %iv, 1
+  store i32 0, ptr %dst, align 4
+  %ec = icmp eq i64 %iv, %n
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @smax_with_multiple_invariant_store_user_same_addr3(ptr noalias %src, ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr3(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT:    store i32 0, ptr [[DST]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
+  %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+  store i32 0, ptr %dst, align 4
+  %iv.next = add i64 %iv, 1
+  store i32 %max.next, ptr %dst, align 4
+  %ec = icmp eq i64 %iv, %n
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @smax_with_multiple_invariant_store_user_different_addr(ptr noalias %src, ptr noalias %dst, ptr noalias %dst.2, i64 %n) {
+; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_different_addr(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[DST_2:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT:    store i32 [[MAX_NEXT]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    store i32 [[MAX_NEXT]], ptr [[DST_2]], align 4
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
+  %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+  store i32 %max.next, ptr %dst, align 4
+  %iv.next = add i64 %iv, 1
+  store i32 %max.next, ptr %dst.2, align 4
+  %ec = icmp eq i64 %iv, %n
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define i32 @chained_instructions_feeding_max1(i32 %x, ptr %src) {
+; CHECK-LABEL: define i32 @chained_instructions_feeding_max1(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MAX]], [[L]]
+; CHECK-NEXT:    [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 [[L]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[MAX_NEXT_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:                              ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+  %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
+  %add = add i32 %max, %l
+  %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 %l)
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 1
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret i32 %max.next
+}
+
+define i32 @chained_instructions_feeding_max2(i32 %x, ptr %src) {
+; CHECK-LABEL: define i32 @chained_instructions_feeding_max2(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[MAX_1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 [[MAX]])
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[L]], [[MAX_1]]
+; CHECK-NEXT:    [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 100)
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[MAX_NEXT_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:                              ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop ]
+  %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv
+  %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max)
+  %l = load i32, ptr %gep.src, align 4
+  %add = add i32 %l, %max.1
+  %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 100)
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 1
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret i32 %max.next
+}
+
+
+define i32 @test_predicated_smin(ptr %src) {
+; CHECK-LABEL: define i32 @test_predicated_smin(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-NEXT:    [[TMP3:%.*]] = fptosi <4 x float> [[TMP2]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP3]])
+; CHECK-NEXT:    [[PREDPHI]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[PREDPHI]])
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %min = phi i32 [ 0, %entry ], [ %min.merge, %loop.latch ]
+  %gep.src = getelementptr float, ptr %src, i64 %iv
+  %l = load float, ptr %gep.src, align 4
+  %c = fcmp une float %l, 0.0
+  br i1 %c, label %then, label %loop.latch
+
+then:
+  %div = fdiv float %l, 3.0
+  %div.i32 = fptosi float %div to i32
+  %min.next = tail call i32 @llvm.smin.i32(i32 %min, i32 %div.i32)
+  br label %loop.latch
+
+loop.latch:
+  %min.merge = phi i32 [ %min.next, %then ], [ %min, %loop.header ]
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 111
+  br i1 %ec, label %exit, label %loop.header
+
+exit:
+  ret i32 %min.merge
+}
+
+define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) {
+; CHECK-LABEL: define i32 @smax_reduction_multiple_incoming(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP_HEADER_PREHEADER:.*]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    br label %[[LOOP_HEADER_PREHEADER]]
+; CHECK:       [[LOOP_HEADER_PREHEADER]]:
+; CHECK-NEXT:    [[IV_PH:%.*]] = phi i32 [ 10, %[[ELSE]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[MAX_PH:%.*]] = phi i32 [ 5, %[[ELSE]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[IV_PH]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK:       [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[N]], [[IV_PH]]
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[IV_PH]], [[N_VEC]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[MAX_PH]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[IV_PH]], [[INDEX]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP5]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[IV_PH]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[MAX_PH]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT:    br label %[[LOOP_HEADER:.*]]
+; CHECK:       [[LOOP_HEADER]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ [[MAX_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP_HEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[MAX_NEXT_LCSSA]]
+;
+entry:
+  br i1 %cond, label %loop.header, label %else
+
+else:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ 10, %else ], [ %iv.next, %loop.header ]
+  %max = phi i32 [ 0, %entry ], [ 5, %else ], [ %max.next, %loop.header ]
+  %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
+  %l = load i32, ptr %gep.src, align 4
+  %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l)
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv, %n
+  br i1 %ec, label %exit, label %loop.header
+
+exit:
+  ret i32 %max.next
+}
diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll
index eabb697..3b77e49 100644
--- a/llvm/test/Transforms/SROA/slice-width.ll
+++ b/llvm/test/Transforms/SROA/slice-width.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
 ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -8,6 +8,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 
 ; This tests that allocas are not split into slices that are not byte width multiple
+;.
+; CHECK: @foo_copy_source = external constant %union.Foo
+; CHECK: @i64_sink = global i64 0
+;.
 define void @no_split_on_non_byte_width(i32) {
 ; CHECK-LABEL: @no_split_on_non_byte_width(
 ; CHECK-NEXT:    [[ARG_SROA_0:%.*]] = alloca i8, align 8
@@ -92,12 +96,12 @@ declare i32 @memcpy_vec3float_helper(ptr)
 
 ; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
 ; vector store, hence accidentally putting gibberish onto the stack.
-define i32 @memcpy_vec3float_widening(ptr %x) {
+define i32 @memcpy_vec3float_widening(ptr %x) !prof !0 {
 ; CHECK-LABEL: @memcpy_vec3float_widening(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-; CHECK-NEXT:    [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef
+; CHECK-NEXT:    [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT:    store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4
@@ -158,6 +162,15 @@ define i1 @presplit_overlarge_load() {
   %L2 = load i1, ptr %A
   ret i1 %L2
 }
+!0 = !{!"function_entry_count", i32 10}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"unknown", !"sroa"}
+;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-MODIFY-CFG: {{.*}}
 ; CHECK-PRESERVE-CFG: {{.*}}
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index de83a0d..4c08b57 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -386,7 +386,9 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
 
   // Make a DWARF transformer object and populate the ranges of the code
   // so we don't end up adding invalid functions to GSYM data.
-  DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites);
+  bool IsMachO = dyn_cast<object::MachOObjectFile>(&Obj) != nullptr;
+
+  DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites, IsMachO);
   if (!TextRanges.empty())
     Gsym.SetValidTextRanges(TextRanges);
 
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index 33f53de..d560073 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -4899,3 +4899,189 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) {
   for (const auto &Line : ExpectedDumpLines)
     EXPECT_TRUE(DumpStr.find(Line) != std::string::npos);
 }
+
+TEST(GSYMTest, TestUnableToLocateDWO) {
+  // Test that llvm-gsymutil will not produce "uanble to locate DWO file" for
+  // Apple binaries. Apple uses DW_AT_GNU_dwo_id for non split DWARF purposes
+  // and this makes llvm-gsymutil create warnings and errors.
+  //
+  // 0x0000000b: DW_TAG_compile_unit
+  //               DW_AT_name        ("main.cpp")
+  //               DW_AT_language    (DW_LANG_C)
+  //               DW_AT_GNU_dwo_id  (0xfffffffe)
+  StringRef yamldata = R"(
+  debug_str:
+    - ''
+    - main.cpp
+  debug_abbrev:
+    - ID:              0
+      Table:
+        - Code:            0x1
+          Tag:             DW_TAG_compile_unit
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_language
+              Form:            DW_FORM_udata
+            - Attribute:       DW_AT_GNU_dwo_id
+              Form:            DW_FORM_data4
+  debug_info:
+    - Length:          0x11
+      Version:         4
+      AbbrevTableID:   0
+      AbbrOffset:      0x0
+      AddrSize:        8
+      Entries:
+        - AbbrCode:        0x1
+          Values:
+            - Value:           0x1
+            - Value:           0x2
+            - Value:           0xFFFFFFFE
+  )";
+  auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata);
+  ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded());
+  std::unique_ptr<DWARFContext> DwarfContext =
+      DWARFContext::create(*ErrOrSections, 8);
+  ASSERT_TRUE(DwarfContext.get() != nullptr);
+  std::string errors;
+  raw_string_ostream OS(errors);
+  OutputAggregator OSAgg(&OS);
+  GsymCreator GC;
+  // Make a DWARF transformer that is MachO (Apple) to avoid warnings about
+  // not finding DWO files.
+  DwarfTransformer DT(*DwarfContext, GC, /*LDCS=*/false, /*MachO*/ true);
+  const uint32_t ThreadCount = 1;
+  ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
+  ASSERT_THAT_ERROR(GC.finalize(OSAgg), Succeeded());
+
+  // Make sure this warning is not in the binary
+  std::string warn("warning: Unable to retrieve DWO .debug_info section for");
+  EXPECT_TRUE(errors.find(warn) == std::string::npos);
+}
+
+TEST(GSYMTest, TestDWARFTransformNoErrorForMissingFileDecl) {
+  // Test that if llvm-gsymutil finds a line table for a compile unit and if
+  // there are no matching entries for a function in that compile unit, that
+  // it doesn't print out a error saying that a DIE has an invalid file index
+  // if there is no DW_AT_decl_file attribute.
+  //
+  // 0x0000000b: DW_TAG_compile_unit
+  //               DW_AT_name        ("main.cpp")
+  //               DW_AT_language    (DW_LANG_C)
+  //               DW_AT_stmt_list   (0x00000000)
+  //
+  // 0x00000015:   DW_TAG_subprogram
+  //                 DW_AT_name      ("foo")
+  //                 DW_AT_low_pc    (0x0000000000001000)
+  //                 DW_AT_high_pc   (0x0000000000001050)
+  //
+  // 0x0000002a:   NULL
+  //
+  // Line table that has entries, but none that match "foo":
+  //
+  // Address            Line   Column File   ISA Discriminator OpIndex Flags
+  // ------------------ ------ ------ ------ --- ------------- ------- -----
+  // 0x0000000000002000     10      0      1   0             0       0 is_stmt
+  // 0x0000000000002050     13      0      1   0             0       0 is_stmt
+
+  StringRef yamldata = R"(
+  debug_str:
+    - ''
+    - main.cpp
+  debug_abbrev:
+    - ID:              0
+      Table:
+        - Code:            0x1
+          Tag:             DW_TAG_compile_unit
+          Children:        DW_CHILDREN_yes
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_language
+              Form:            DW_FORM_udata
+            - Attribute:       DW_AT_stmt_list
+              Form:            DW_FORM_sec_offset
+        - Code:            0x2
+          Tag:             DW_TAG_subprogram
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_string
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_addr
+  debug_info:
+    - Length:          0x27
+      Version:         4
+      AbbrevTableID:   0
+      AbbrOffset:      0x0
+      AddrSize:        8
+      Entries:
+        - AbbrCode:        0x1
+          Values:
+            - Value:           0x1
+            - Value:           0x2
+            - Value:           0x0
+        - AbbrCode:        0x2
+          Values:
+            - Value:           0xDEADBEEFDEADBEEF
+              CStr:            foo
+            - Value:           0x1000
+            - Value:           0x1050
+        - AbbrCode:        0x0
+  debug_line:
+    - Length:          58
+      Version:         2
+      PrologueLength:  31
+      MinInstLength:   1
+      DefaultIsStmt:   1
+      LineBase:        251
+      LineRange:       14
+      OpcodeBase:      13
+      StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+      Files:
+        - Name:            main.cpp
+          DirIdx:          0
+          ModTime:         0
+          Length:          0
+      Opcodes:
+        - Opcode:          DW_LNS_extended_op
+          ExtLen:          9
+          SubOpcode:       DW_LNE_set_address
+          Data:            8192
+        - Opcode:          DW_LNS_advance_line
+          SData:           9
+          Data:            0
+        - Opcode:          DW_LNS_copy
+          Data:            0
+        - Opcode:          DW_LNS_advance_pc
+          Data:            80
+        - Opcode:          DW_LNS_advance_line
+          SData:           3
+          Data:            0
+        - Opcode:          DW_LNS_extended_op
+          ExtLen:          1
+          SubOpcode:       DW_LNE_end_sequence
+          Data:            0
+  )";
+  auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata);
+  ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded());
+  std::unique_ptr<DWARFContext> DwarfContext =
+      DWARFContext::create(*ErrOrSections, 8);
+  ASSERT_TRUE(DwarfContext.get() != nullptr);
+  std::string errors;
+  raw_string_ostream OS(errors);
+  OutputAggregator OSAgg(&OS);
+  GsymCreator GC;
+  DwarfTransformer DT(*DwarfContext, GC);
+  const uint32_t ThreadCount = 1;
+  ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
+  ASSERT_THAT_ERROR(GC.finalize(OSAgg), Succeeded());
+
+  // Make sure this warning is not in the binary
+  std::string error_str("error: function DIE at 0x00000015 has an invalid file "
+                        "index 4294967295 in its DW_AT_decl_file attribute");
+  EXPECT_TRUE(errors.find(error_str) == std::string::npos);
+}
diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
index b4d816e..3c6ff11 100644
--- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp
@@ -266,10 +266,9 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
     return;
 
   StringRef Lang = DirLang.getName();
+  IncludeGuardEmitter IncGuard(OS, (Twine("LLVM_") + Lang + "_INC").str());
 
-  OS << "#ifndef LLVM_" << Lang << "_INC\n";
-  OS << "#define LLVM_" << Lang << "_INC\n";
-  OS << "\n#include \"llvm/ADT/ArrayRef.h\"\n";
+  OS << "#include \"llvm/ADT/ArrayRef.h\"\n";
 
   if (DirLang.hasEnableBitmaskEnumInNamespace())
     OS << "#include \"llvm/ADT/BitmaskEnum.h\"\n";
@@ -370,7 +369,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
     OS << "};\n";
   }
   LlvmNS.close();
-  OS << "#endif // LLVM_" << Lang << "_INC\n";
 }
 
 // Given a list of spellings (for a given clause/directive), order them
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index a5c5426..3f8be5e 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -1310,14 +1310,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll
 Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
 Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
 Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
-Transforms/SROA/phi-gep.ll
-Transforms/SROA/scalable-vectors-with-known-vscale.ll
-Transforms/SROA/select-gep.ll
-Transforms/SROA/select-load.ll
-Transforms/SROA/slice-width.ll
-Transforms/SROA/vector-conversion.ll
-Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
-Transforms/SROA/vector-promotion.ll
 Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll
 Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
 Transforms/StructurizeCFG/hoist-zerocost.ll
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
index c477c6c..dcc1ef9 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -315,7 +315,8 @@ bool mlir::linalg::detail::isContractionBody(
 
   Value yielded = getSourceSkipUnary(terminator->getOperand(0));
   Operation *reductionOp = yielded.getDefiningOp();
-  if (reductionOp->getNumResults() != 1 || reductionOp->getNumOperands() != 2) {
+  if (!reductionOp || reductionOp->getNumResults() != 1 ||
+      reductionOp->getNumOperands() != 2) {
     errs << "expected reduction op to be binary";
     return false;
   }
diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
index 618ba34..66cae5c 100644
--- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
+++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
@@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } {
     } -> tensor<1x1x4xf32>
     return
   }
+
+  func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) {
+    %0 = linalg.generic {
+      indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
+                       affine_map<(d0, d1, d2) -> (d2, d1)>,
+                       affine_map<(d0, d1, d2) -> (d0, d1)>],
+      iterator_types = ["parallel", "parallel", "reduction"]}
+      ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>)
+      outs(%arg2 : tensor<128x128xi32>) {
+      ^bb0(%in: i32, %in_0: i32, %out: i32):
+        linalg.yield %out : i32
+      } -> tensor<128x128xi32>
+    return
+  }
 }
 
 // -----
diff --git a/orc-rt/include/orc-rt/ExecutorAddress.h b/orc-rt/include/orc-rt/ExecutorAddress.h
index cc7bbf5..6ec583f 100644
--- a/orc-rt/include/orc-rt/ExecutorAddress.h
+++ b/orc-rt/include/orc-rt/ExecutorAddress.h
@@ -204,6 +204,9 @@ struct ExecutorAddrRange {
   constexpr bool contains(ExecutorAddr Addr) const noexcept {
     return Start <= Addr && Addr < End;
   }
+  constexpr bool contains(const ExecutorAddrRange &Other) const noexcept {
+    return (Other.Start >= Start && Other.End <= End);
+  }
   constexpr bool overlaps(const ExecutorAddrRange &Other) const noexcept {
     return !(Other.End <= Start || End <= Other.Start);
   }
diff --git a/orc-rt/unittests/ExecutorAddressTest.cpp b/orc-rt/unittests/ExecutorAddressTest.cpp
index 98074a7..2e04901 100644
--- a/orc-rt/unittests/ExecutorAddressTest.cpp
+++ b/orc-rt/unittests/ExecutorAddressTest.cpp
@@ -97,10 +97,16 @@ TEST(ExecutorAddrTest, AddrRanges) {
   EXPECT_FALSE(R1.contains(A0));
   EXPECT_FALSE(R1.contains(A2));
 
+  EXPECT_TRUE(R3.contains(R0));  // True for singleton range at start.
+  EXPECT_TRUE(R3.contains(R1));  // True for singleton range at end.
+  EXPECT_FALSE(R3.contains(R2)); // False for non-overlaping singleton range.
+  EXPECT_FALSE(R3.contains(R4)); // False for overlapping, uncontained range.
+
   EXPECT_FALSE(R1.overlaps(R0));
   EXPECT_FALSE(R1.overlaps(R2));
   EXPECT_TRUE(R1.overlaps(R3));
   EXPECT_TRUE(R1.overlaps(R4));
+  EXPECT_TRUE(R3.overlaps(R4));
 }
 
 TEST(ExecutorAddrTest, Hashable) {
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index c83ab59..74d632b 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1412,6 +1412,13 @@ td_library(
 )
 
 td_library(
+    name = "InferStridedMetadataInterfaceTdFiles",
+    srcs = ["include/mlir/Interfaces/InferStridedMetadataInterface.td"],
+    includes = ["include"],
+    deps = [":OpBaseTdFiles"],
+)
+
+td_library(
     name = "InferTypeOpInterfaceTdFiles",
     srcs = ["include/mlir/Interfaces/InferTypeOpInterface.td"],
     includes = ["include"],
@@ -4159,6 +4166,7 @@ cc_library(
         ":MathToLibm",
         ":MathToROCDL",
         ":MathToSPIRV",
+        ":MathToXeVM",
         ":MemRefToEmitC",
         ":MemRefToLLVM",
         ":MemRefToSPIRV",
@@ -7049,6 +7057,33 @@ cc_library(
 )
 
 cc_library(
+    name = "MathToXeVM",
+    srcs = glob([
+        "lib/Conversion/MathToXeVM/*.cpp",
+    ]),
+    hdrs = glob([
+        "include/mlir/Conversion/MathToXeVM/*.h",
+    ]),
+    includes = [
+        "include",
+        "lib/Conversion/MathToXeVM",
+    ],
+    deps = [
+        ":ArithAttrToLLVMConversion",
+        ":ArithDialect",
+        ":ConversionPassIncGen",
+        ":IR",
+        ":LLVMCommonConversion",
+        ":LLVMDialect",
+        ":MathDialect",
+        ":Pass",
+        ":Transforms",
+        ":XeVMDialect",
+        "//llvm:Support",
+    ],
+)
+
+cc_library(
     name = "FuncToEmitC",
     srcs = glob([
         "lib/Conversion/FuncToEmitC/*.cpp",
@@ -7622,6 +7657,30 @@ cc_library(
     ],
 )
 
+gentbl_cc_library(
+    name = "InferStridedMetadataInterfaceIncGen",
+    tbl_outs = {
+        "include/mlir/Interfaces/InferStridedMetadataInterface.h.inc": ["-gen-op-interface-decls"],
+        "include/mlir/Interfaces/InferStridedMetadataInterface.cpp.inc": ["-gen-op-interface-defs"],
+    },
+    tblgen = ":mlir-tblgen",
+    td_file = "include/mlir/Interfaces/InferStridedMetadataInterface.td",
+    deps = [":InferStridedMetadataInterfaceTdFiles"],
+)
+
+cc_library(
+    name = "InferStridedMetadataInterface",
+    srcs = ["lib/Interfaces/InferStridedMetadataInterface.cpp"],
+    hdrs = ["include/mlir/Interfaces/InferStridedMetadataInterface.h"],
+    includes = ["include"],
+    deps = [
+        ":IR",
+        ":InferIntRangeInterface",
+        ":InferStridedMetadataInterfaceIncGen",
+        "//llvm:Support",
+    ],
+)
+
 td_library(
     name = "DataLayoutInterfacesTdFiles",
     srcs = ["include/mlir/Interfaces/DataLayoutInterfaces.td"],
@@ -8550,9 +8609,11 @@ cc_library(
         ":CallOpInterfaces",
         ":ControlFlowInterfaces",
         ":DataLayoutInterfaces",
+        ":DialectUtils",
         ":FunctionInterfaces",
         ":IR",
         ":InferIntRangeInterface",
+        ":InferStridedMetadataInterface",
         ":LoopLikeInterface",
         ":Pass",
         ":SideEffectInterfaces",
@@ -12474,6 +12535,7 @@ cc_library(
         ":IR",
         ":InferIntRangeCommon",
         ":InferIntRangeInterface",
+        ":InferStridedMetadataInterface",
         ":InferTypeOpInterface",
         ":InliningUtils",
         ":Pass",
@@ -12695,6 +12757,7 @@ td_library(
         ":ArithOpsTdFiles",
         ":CastInterfacesTdFiles",
         ":ControlFlowInterfacesTdFiles",
+        ":InferStridedMetadataInterfaceTdFiles",
         ":MemOpInterfacesTdFiles",
         ":MemorySlotInterfacesTdFiles",
         ":OpBaseTdFiles",
@@ -12785,6 +12848,7 @@ cc_library(
         ":IR",
         ":InferIntRangeCommon",
         ":InferIntRangeInterface",
+        ":InferStridedMetadataInterface",
         ":InferTypeOpInterface",
         ":InliningUtils",
         ":MemOpInterfaces",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel
index b00e8f2..d8fcb53 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/Conversion/BUILD.bazel
@@ -1,4 +1,5 @@
 load("//llvm:lit_test.bzl", "lit_test")
+load("//llvm:targets.bzl", "llvm_targets")
 
 licenses(["notice"])
 
@@ -15,6 +16,9 @@ package(default_visibility = ["//visibility:public"])
     )
     for src in glob(
         include = ["**/*.mlir"],
-        exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"],
+        exclude = ["GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir"] + (
+            # MathToXeVM needs SPIRV; see MathToXeVM/lit.local.cfg
+            ["MathToXeVM/**"] if "SPIRV" not in llvm_targets else []
+        ),
     )
 ]