[𝘀𝗽𝗿] changes introduced through rebaseusers/qinkunbao/spr/main.sanitizerdocnfi-update-the-doc-for-prefixsanitize

Created using spr 1.3.6 [skip ci]
author: Qinkun Bao <qinkun@google.com> 2025-06-04 00:09:20 +0000
committer: Qinkun Bao <qinkun@google.com> 2025-06-04 00:09:20 +0000
commit: 4b7b3fcc7e5a2c85607d5445eece319d7a1a22c9 (patch)
tree: 03fb70d0af5bee0a070fe712637f25e606cdb752
parent: 50f9b8acafdca48e87e6b8e393c1f116a2d193ee (diff)
parent: 2ff2a076cc089f0c977ce7aea231ee5541879f8a (diff)
download: llvm-users/qinkunbao/spr/main.sanitizerdocnfi-update-the-doc-for-prefixsanitize.zip
llvm-users/qinkunbao/spr/main.sanitizerdocnfi-update-the-doc-for-prefixsanitize.tar.gz
llvm-users/qinkunbao/spr/main.sanitizerdocnfi-update-the-doc-for-prefixsanitize.tar.bz2
67 files changed, 1434 insertions, 903 deletions
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 87bb9df..43c28c8 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4579,7 +4579,6 @@ class ShuffleVectorExpr : public Expr {
   // indices.  The number of values in this list is always
   // 2+the number of indices in the vector type.
   Stmt **SubExprs;
-  unsigned NumExprs;
 
 public:
   ShuffleVectorExpr(const ASTContext &C, ArrayRef<Expr*> args, QualType Type,
@@ -4605,25 +4604,28 @@ public:
   /// getNumSubExprs - Return the size of the SubExprs array.  This includes the
   /// constant expression, the actual arguments passed in, and the function
   /// pointers.
-  unsigned getNumSubExprs() const { return NumExprs; }
+  unsigned getNumSubExprs() const { return ShuffleVectorExprBits.NumExprs; }
 
   /// Retrieve the array of expressions.
   Expr **getSubExprs() { return reinterpret_cast<Expr **>(SubExprs); }
 
   /// getExpr - Return the Expr at the specified index.
   Expr *getExpr(unsigned Index) {
-    assert((Index < NumExprs) && "Arg access out of range!");
+    assert((Index < ShuffleVectorExprBits.NumExprs) &&
+           "Arg access out of range!");
     return cast<Expr>(SubExprs[Index]);
   }
   const Expr *getExpr(unsigned Index) const {
-    assert((Index < NumExprs) && "Arg access out of range!");
+    assert((Index < ShuffleVectorExprBits.NumExprs) &&
+           "Arg access out of range!");
     return cast<Expr>(SubExprs[Index]);
   }
 
   void setExprs(const ASTContext &C, ArrayRef<Expr *> Exprs);
 
   llvm::APSInt getShuffleMaskIdx(unsigned N) const {
-    assert((N < NumExprs - 2) && "Shuffle idx out of range!");
+    assert((N < ShuffleVectorExprBits.NumExprs - 2) &&
+           "Shuffle idx out of range!");
     assert(isa<ConstantExpr>(getExpr(N + 2)) &&
            "Index expression must be a ConstantExpr");
     return cast<ConstantExpr>(getExpr(N + 2))->getAPValueResult().getInt();
@@ -4631,10 +4633,12 @@ public:
 
   // Iterators
   child_range children() {
-    return child_range(&SubExprs[0], &SubExprs[0]+NumExprs);
+    return child_range(&SubExprs[0],
+                       &SubExprs[0] + ShuffleVectorExprBits.NumExprs);
   }
   const_child_range children() const {
-    return const_child_range(&SubExprs[0], &SubExprs[0] + NumExprs);
+    return const_child_range(&SubExprs[0],
+                             &SubExprs[0] + ShuffleVectorExprBits.NumExprs);
   }
 };
 
@@ -4776,13 +4780,13 @@ class ChooseExpr : public Expr {
   enum { COND, LHS, RHS, END_EXPR };
   Stmt* SubExprs[END_EXPR]; // Left/Middle/Right hand sides.
   SourceLocation BuiltinLoc, RParenLoc;
-  bool CondIsTrue;
+
 public:
   ChooseExpr(SourceLocation BLoc, Expr *cond, Expr *lhs, Expr *rhs, QualType t,
              ExprValueKind VK, ExprObjectKind OK, SourceLocation RP,
              bool condIsTrue)
-      : Expr(ChooseExprClass, t, VK, OK), BuiltinLoc(BLoc), RParenLoc(RP),
-        CondIsTrue(condIsTrue) {
+      : Expr(ChooseExprClass, t, VK, OK), BuiltinLoc(BLoc), RParenLoc(RP) {
+    ChooseExprBits.CondIsTrue = condIsTrue;
     SubExprs[COND] = cond;
     SubExprs[LHS] = lhs;
     SubExprs[RHS] = rhs;
@@ -4798,9 +4802,9 @@ public:
   bool isConditionTrue() const {
     assert(!isConditionDependent() &&
            "Dependent condition isn't true or false");
-    return CondIsTrue;
+    return ChooseExprBits.CondIsTrue;
   }
-  void setIsConditionTrue(bool isTrue) { CondIsTrue = isTrue; }
+  void setIsConditionTrue(bool isTrue) { ChooseExprBits.CondIsTrue = isTrue; }
 
   bool isConditionDependent() const {
     return getCond()->isTypeDependent() || getCond()->isValueDependent();
diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index 6ed049c..4c9636f 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -2974,10 +2974,6 @@ public:
 ///   __array_extent(int[10][20], 1) == 20
 /// \endcode
 class ArrayTypeTraitExpr : public Expr {
-  /// The trait. An ArrayTypeTrait enum in MSVC compat unsigned.
-  LLVM_PREFERRED_TYPE(ArrayTypeTrait)
-  unsigned ATT : 2;
-
   /// The value of the type trait. Unspecified if dependent.
   uint64_t Value = 0;
 
@@ -2999,21 +2995,27 @@ public:
   ArrayTypeTraitExpr(SourceLocation loc, ArrayTypeTrait att,
                      TypeSourceInfo *queried, uint64_t value, Expr *dimension,
                      SourceLocation rparen, QualType ty)
-      : Expr(ArrayTypeTraitExprClass, ty, VK_PRValue, OK_Ordinary), ATT(att),
+      : Expr(ArrayTypeTraitExprClass, ty, VK_PRValue, OK_Ordinary),
         Value(value), Dimension(dimension), Loc(loc), RParen(rparen),
         QueriedType(queried) {
     assert(att <= ATT_Last && "invalid enum value!");
-    assert(static_cast<unsigned>(att) == ATT && "ATT overflow!");
+    ArrayTypeTraitExprBits.ATT = att;
+    assert(static_cast<unsigned>(att) == ArrayTypeTraitExprBits.ATT &&
+           "ATT overflow!");
     setDependence(computeDependence(this));
   }
 
   explicit ArrayTypeTraitExpr(EmptyShell Empty)
-      : Expr(ArrayTypeTraitExprClass, Empty), ATT(0) {}
+      : Expr(ArrayTypeTraitExprClass, Empty) {
+    ArrayTypeTraitExprBits.ATT = 0;
+  }
 
   SourceLocation getBeginLoc() const LLVM_READONLY { return Loc; }
   SourceLocation getEndLoc() const LLVM_READONLY { return RParen; }
 
-  ArrayTypeTrait getTrait() const { return static_cast<ArrayTypeTrait>(ATT); }
+  ArrayTypeTrait getTrait() const {
+    return static_cast<ArrayTypeTrait>(ArrayTypeTraitExprBits.ATT);
+  }
 
   QualType getQueriedType() const { return QueriedType->getType(); }
 
@@ -3045,14 +3047,6 @@ public:
 ///   __is_lvalue_expr(1) == false
 /// \endcode
 class ExpressionTraitExpr : public Expr {
-  /// The trait. A ExpressionTrait enum in MSVC compatible unsigned.
-  LLVM_PREFERRED_TYPE(ExpressionTrait)
-  unsigned ET : 31;
-
-  /// The value of the type trait. Unspecified if dependent.
-  LLVM_PREFERRED_TYPE(bool)
-  unsigned Value : 1;
-
   /// The location of the type trait keyword.
   SourceLocation Loc;
 
@@ -3068,24 +3062,32 @@ public:
   ExpressionTraitExpr(SourceLocation loc, ExpressionTrait et, Expr *queried,
                       bool value, SourceLocation rparen, QualType resultType)
       : Expr(ExpressionTraitExprClass, resultType, VK_PRValue, OK_Ordinary),
-        ET(et), Value(value), Loc(loc), RParen(rparen),
-        QueriedExpression(queried) {
+        Loc(loc), RParen(rparen), QueriedExpression(queried) {
+    ExpressionTraitExprBits.ET = et;
+    ExpressionTraitExprBits.Value = value;
+
     assert(et <= ET_Last && "invalid enum value!");
-    assert(static_cast<unsigned>(et) == ET && "ET overflow!");
+    assert(static_cast<unsigned>(et) == ExpressionTraitExprBits.ET &&
+           "ET overflow!");
     setDependence(computeDependence(this));
   }
 
   explicit ExpressionTraitExpr(EmptyShell Empty)
-      : Expr(ExpressionTraitExprClass, Empty), ET(0), Value(false) {}
+      : Expr(ExpressionTraitExprClass, Empty) {
+    ExpressionTraitExprBits.ET = 0;
+    ExpressionTraitExprBits.Value = false;
+  }
 
   SourceLocation getBeginLoc() const LLVM_READONLY { return Loc; }
   SourceLocation getEndLoc() const LLVM_READONLY { return RParen; }
 
-  ExpressionTrait getTrait() const { return static_cast<ExpressionTrait>(ET); }
+  ExpressionTrait getTrait() const {
+    return static_cast<ExpressionTrait>(ExpressionTraitExprBits.ET);
+  }
 
   Expr *getQueriedExpression() const { return QueriedExpression; }
 
-  bool getValue() const { return Value; }
+  bool getValue() const { return ExpressionTraitExprBits.Value; }
 
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == ExpressionTraitExprClass;
@@ -4506,22 +4508,15 @@ class PackIndexingExpr final
   // The pack being indexed, followed by the index
   Stmt *SubExprs[2];
 
-  // The size of the trailing expressions.
-  unsigned TransformedExpressions : 31;
-
-  LLVM_PREFERRED_TYPE(bool)
-  unsigned FullySubstituted : 1;
-
   PackIndexingExpr(QualType Type, SourceLocation EllipsisLoc,
                    SourceLocation RSquareLoc, Expr *PackIdExpr, Expr *IndexExpr,
                    ArrayRef<Expr *> SubstitutedExprs = {},
                    bool FullySubstituted = false)
       : Expr(PackIndexingExprClass, Type, VK_LValue, OK_Ordinary),
         EllipsisLoc(EllipsisLoc), RSquareLoc(RSquareLoc),
-        SubExprs{PackIdExpr, IndexExpr},
-        TransformedExpressions(SubstitutedExprs.size()),
-        FullySubstituted(FullySubstituted) {
-
+        SubExprs{PackIdExpr, IndexExpr} {
+    PackIndexingExprBits.TransformedExpressions = SubstitutedExprs.size();
+    PackIndexingExprBits.FullySubstituted = FullySubstituted;
     auto *Exprs = getTrailingObjects<Expr *>();
     llvm::uninitialized_copy(SubstitutedExprs, Exprs);
 
@@ -4534,7 +4529,7 @@ class PackIndexingExpr final
   PackIndexingExpr(EmptyShell Empty) : Expr(PackIndexingExprClass, Empty) {}
 
   unsigned numTrailingObjects(OverloadToken<Expr *>) const {
-    return TransformedExpressions;
+    return PackIndexingExprBits.TransformedExpressions;
   }
 
 public:
@@ -4548,11 +4543,14 @@ public:
                                               unsigned NumTransformedExprs);
 
   // The index expression and all elements of the pack have been substituted.
-  bool isFullySubstituted() const { return FullySubstituted; }
+  bool isFullySubstituted() const {
+    return PackIndexingExprBits.FullySubstituted;
+  }
 
   /// Determine if the expression was expanded to empty.
   bool expandsToEmptyPack() const {
-    return isFullySubstituted() && TransformedExpressions == 0;
+    return isFullySubstituted() &&
+           PackIndexingExprBits.TransformedExpressions == 0;
   }
 
   /// Determine the location of the 'sizeof' keyword.
@@ -4590,7 +4588,8 @@ public:
 
   /// Return the trailing expressions, regardless of the expansion.
   ArrayRef<Expr *> getExpressions() const {
-    return {getTrailingObjects<Expr *>(), TransformedExpressions};
+    return {getTrailingObjects<Expr *>(),
+            PackIndexingExprBits.TransformedExpressions};
   }
 
   static bool classof(const Stmt *T) {
@@ -4988,7 +4987,6 @@ class CXXFoldExpr : public Expr {
   // than the number of expansions.
   UnsignedOrNone NumExpansions = std::nullopt;
   Stmt *SubExprs[SubExpr::Count];
-  BinaryOperatorKind Opcode;
 
 public:
   CXXFoldExpr(QualType T, UnresolvedLookupExpr *Callee,
@@ -5021,7 +5019,7 @@ public:
   SourceLocation getLParenLoc() const { return LParenLoc; }
   SourceLocation getRParenLoc() const { return RParenLoc; }
   SourceLocation getEllipsisLoc() const { return EllipsisLoc; }
-  BinaryOperatorKind getOperator() const { return Opcode; }
+  BinaryOperatorKind getOperator() const { return CXXFoldExprBits.Opcode; }
 
   UnsignedOrNone getNumExpansions() const { return NumExpansions; }
 
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index 3be25c7..48a6aea 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -19,6 +19,7 @@
 #include "clang/AST/OperationKinds.h"
 #include "clang/AST/StmtIterator.h"
 #include "clang/Basic/CapturedStmt.h"
+#include "clang/Basic/ExpressionTraits.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/Lambda.h"
@@ -736,6 +737,15 @@ protected:
     unsigned ProducedByFoldExpansion : 1;
   };
 
+  class ShuffleVectorExprBitfields {
+    friend class ShuffleVectorExpr;
+
+    LLVM_PREFERRED_TYPE(ExprBitfields)
+    unsigned : NumExprBits;
+
+    unsigned NumExprs;
+  };
+
   class StmtExprBitfields {
     friend class ASTStmtReader;
     friend class StmtExpr;
@@ -749,6 +759,17 @@ protected:
     unsigned TemplateDepth;
   };
 
+  class ChooseExprBitfields {
+    friend class ASTStmtReader;
+    friend class ChooseExpr;
+
+    LLVM_PREFERRED_TYPE(ExprBitfields)
+    unsigned : NumExprBits;
+
+    LLVM_PREFERRED_TYPE(bool)
+    bool CondIsTrue : 1;
+  };
+
   //===--- C++ Expression bitfields classes ---===//
 
   class CXXOperatorCallExprBitfields {
@@ -1184,6 +1205,57 @@ protected:
     SourceLocation RequiresKWLoc;
   };
 
+  class ArrayTypeTraitExprBitfields {
+    friend class ArrayTypeTraitExpr;
+    friend class ASTStmtReader;
+    LLVM_PREFERRED_TYPE(ExprBitfields)
+    unsigned : NumExprBits;
+
+    /// The trait. An ArrayTypeTrait enum in MSVC compat unsigned.
+    LLVM_PREFERRED_TYPE(ArrayTypeTrait)
+    unsigned ATT : 2;
+  };
+
+  class ExpressionTraitExprBitfields {
+    friend class ExpressionTraitExpr;
+    friend class ASTStmtReader;
+    LLVM_PREFERRED_TYPE(ExprBitfields)
+    unsigned : NumExprBits;
+
+    /// The trait. A ExpressionTrait enum in MSVC compatible unsigned.
+    LLVM_PREFERRED_TYPE(ExpressionTrait)
+    unsigned ET : 31;
+
+    /// The value of the type trait. Unspecified if dependent.
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned Value : 1;
+  };
+
+  class CXXFoldExprBitfields {
+    friend class CXXFoldExpr;
+    friend class ASTStmtReader;
+    friend class ASTStmtWriter;
+
+    LLVM_PREFERRED_TYPE(ExprBitfields)
+    unsigned : NumExprBits;
+
+    BinaryOperatorKind Opcode;
+  };
+
+  class PackIndexingExprBitfields {
+    friend class PackIndexingExpr;
+    friend class ASTStmtWriter;
+    friend class ASTStmtReader;
+
+    LLVM_PREFERRED_TYPE(ExprBitfields)
+    unsigned : NumExprBits;
+    // The size of the trailing expressions.
+    unsigned TransformedExpressions : 31;
+
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned FullySubstituted : 1;
+  };
+
   //===--- C++ Coroutines bitfields classes ---===//
 
   class CoawaitExprBitfields {
@@ -1279,9 +1351,11 @@ protected:
     PseudoObjectExprBitfields PseudoObjectExprBits;
     SourceLocExprBitfields SourceLocExprBits;
     ParenExprBitfields ParenExprBits;
+    ShuffleVectorExprBitfields ShuffleVectorExprBits;
 
     // GNU Extensions.
     StmtExprBitfields StmtExprBits;
+    ChooseExprBitfields ChooseExprBits;
 
     // C++ Expressions
     CXXOperatorCallExprBitfields CXXOperatorCallExprBits;
@@ -1308,6 +1382,10 @@ protected:
     SubstNonTypeTemplateParmExprBitfields SubstNonTypeTemplateParmExprBits;
     LambdaExprBitfields LambdaExprBits;
     RequiresExprBitfields RequiresExprBits;
+    ArrayTypeTraitExprBitfields ArrayTypeTraitExprBits;
+    ExpressionTraitExprBitfields ExpressionTraitExprBits;
+    CXXFoldExprBitfields CXXFoldExprBits;
+    PackIndexingExprBitfields PackIndexingExprBits;
 
     // C++ Coroutines expressions
     CoawaitExprBitfields CoawaitBits;
diff --git a/clang/include/clang/AST/StmtCXX.h b/clang/include/clang/AST/StmtCXX.h
index 8b4ef24..a15a445 100644
--- a/clang/include/clang/AST/StmtCXX.h
+++ b/clang/include/clang/AST/StmtCXX.h
@@ -133,11 +133,11 @@ public:
 /// analysis of the constituent components. The original syntactic components
 /// can be extracted using getLoopVariable and getRangeInit.
 class CXXForRangeStmt : public Stmt {
-  SourceLocation ForLoc;
   enum { INIT, RANGE, BEGINSTMT, ENDSTMT, COND, INC, LOOPVAR, BODY, END };
   // SubExprs[RANGE] is an expression or declstmt.
   // SubExprs[COND] and SubExprs[INC] are expressions.
   Stmt *SubExprs[END];
+  SourceLocation ForLoc;
   SourceLocation CoawaitLoc;
   SourceLocation ColonLoc;
   SourceLocation RParenLoc;
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index be75b9e..beda73e 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -813,19 +813,19 @@ changes to one object won't affect the others, the object's initializer will run
 once per copy, etc.
 
 Specifically, this warning fires when it detects an object which:
-  1. Is defined as ``inline`` in a header file (so it might get compiled into multiple libaries), and
-  2. Has external linkage (otherwise it's supposed to be duplicated), and
-  3. Has hidden visibility.
+1. Is defined as ``inline`` in a header file (so it might get compiled into multiple libaries), and
+2. Has external linkage (otherwise it's supposed to be duplicated), and
+3. Has hidden visibility.
 
 As well as one of the following:
-  1. The object is mutable, or
-  2. The object's initializer definitely has side effects.
+1. The object is mutable, or
+2. The object's initializer definitely has side effects.
 
 The warning can be resolved by removing one of the conditions above. In rough
 order of preference, this may be done by:
-  1. Marking the object ``const`` (if possible)
-  2. Moving the object's definition to a source file
-  3. Giving the object non-hidden visibility, e.g. using ``__attribute((visibility("default")))``.
+1. Marking the object ``const`` (if possible)
+2. Moving the object's definition to a source file
+3. Giving the object non-hidden visibility, e.g. using ``__attribute((visibility("default")))``.
 
 Note that for (2), all levels of a pointer variable must be constant;
 ``const int*`` will trigger the warning because the pointer itself is mutable.
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 3993db8..17d2cb4 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4393,7 +4393,8 @@ ShuffleVectorExpr::ShuffleVectorExpr(const ASTContext &C, ArrayRef<Expr *> args,
                                      QualType Type, SourceLocation BLoc,
                                      SourceLocation RP)
     : Expr(ShuffleVectorExprClass, Type, VK_PRValue, OK_Ordinary),
-      BuiltinLoc(BLoc), RParenLoc(RP), NumExprs(args.size()) {
+      BuiltinLoc(BLoc), RParenLoc(RP) {
+  ShuffleVectorExprBits.NumExprs = args.size();
   SubExprs = new (C) Stmt*[args.size()];
   for (unsigned i = 0; i != args.size(); i++)
     SubExprs[i] = args[i];
@@ -4404,8 +4405,8 @@ ShuffleVectorExpr::ShuffleVectorExpr(const ASTContext &C, ArrayRef<Expr *> args,
 void ShuffleVectorExpr::setExprs(const ASTContext &C, ArrayRef<Expr *> Exprs) {
   if (SubExprs) C.Deallocate(SubExprs);
 
-  this->NumExprs = Exprs.size();
-  SubExprs = new (C) Stmt*[NumExprs];
+  this->ShuffleVectorExprBits.NumExprs = Exprs.size();
+  SubExprs = new (C) Stmt *[ShuffleVectorExprBits.NumExprs];
   memcpy(SubExprs, Exprs.data(), sizeof(Expr *) * Exprs.size());
 }
 
diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp
index 5c712e1..bd43ed5 100644
--- a/clang/lib/AST/ExprCXX.cpp
+++ b/clang/lib/AST/ExprCXX.cpp
@@ -2003,7 +2003,8 @@ CXXFoldExpr::CXXFoldExpr(QualType T, UnresolvedLookupExpr *Callee,
                          UnsignedOrNone NumExpansions)
     : Expr(CXXFoldExprClass, T, VK_PRValue, OK_Ordinary), LParenLoc(LParenLoc),
       EllipsisLoc(EllipsisLoc), RParenLoc(RParenLoc),
-      NumExpansions(NumExpansions), Opcode(Opcode) {
+      NumExpansions(NumExpansions) {
+  CXXFoldExprBits.Opcode = Opcode;
   // We rely on asserted invariant to distinguish left and right folds.
   assert(((LHS && LHS->containsUnexpandedParameterPack()) !=
           (RHS && RHS->containsUnexpandedParameterPack())) &&
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 290521a..d633902 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -2828,6 +2828,13 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
     break;
 #include "clang/Basic/HLSLIntangibleTypes.def"
 
+#define SVE_TYPE(Name, Id, SingletonId)                                        \
+  case BuiltinType::Id:                                                        \
+    mangleArtificialTagType(TagTypeKind::Struct, #Name, {"__clang"});          \
+    break;
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits)
+#include "clang/Basic/AArch64ACLETypes.def"
+
     // Issue an error for any type not explicitly handled.
   default:
     Error(Range.getBegin(), "built-in type: ",
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index f1bb33a..01c838b 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2156,7 +2156,7 @@ void ASTStmtReader::VisitTypeTraitExpr(TypeTraitExpr *E) {
 
 void ASTStmtReader::VisitArrayTypeTraitExpr(ArrayTypeTraitExpr *E) {
   VisitExpr(E);
-  E->ATT = (ArrayTypeTrait)Record.readInt();
+  E->ArrayTypeTraitExprBits.ATT = (ArrayTypeTrait)Record.readInt();
   E->Value = (unsigned int)Record.readInt();
   SourceRange Range = readSourceRange();
   E->Loc = Range.getBegin();
@@ -2167,8 +2167,8 @@ void ASTStmtReader::VisitArrayTypeTraitExpr(ArrayTypeTraitExpr *E) {
 
 void ASTStmtReader::VisitExpressionTraitExpr(ExpressionTraitExpr *E) {
   VisitExpr(E);
-  E->ET = (ExpressionTrait)Record.readInt();
-  E->Value = (bool)Record.readInt();
+  E->ExpressionTraitExprBits.ET = (ExpressionTrait)Record.readInt();
+  E->ExpressionTraitExprBits.Value = (bool)Record.readInt();
   SourceRange Range = readSourceRange();
   E->QueriedExpression = Record.readSubExpr();
   E->Loc = Range.getBegin();
@@ -2209,14 +2209,14 @@ void ASTStmtReader::VisitSizeOfPackExpr(SizeOfPackExpr *E) {
 
 void ASTStmtReader::VisitPackIndexingExpr(PackIndexingExpr *E) {
   VisitExpr(E);
-  E->TransformedExpressions = Record.readInt();
-  E->FullySubstituted = Record.readInt();
+  E->PackIndexingExprBits.TransformedExpressions = Record.readInt();
+  E->PackIndexingExprBits.FullySubstituted = Record.readInt();
   E->EllipsisLoc = readSourceLocation();
   E->RSquareLoc = readSourceLocation();
   E->SubExprs[0] = Record.readStmt();
   E->SubExprs[1] = Record.readStmt();
   auto **Exprs = E->getTrailingObjects<Expr *>();
-  for (unsigned I = 0; I < E->TransformedExpressions; ++I)
+  for (unsigned I = 0; I < E->PackIndexingExprBits.TransformedExpressions; ++I)
     Exprs[I] = Record.readExpr();
 }
 
@@ -2275,7 +2275,7 @@ void ASTStmtReader::VisitCXXFoldExpr(CXXFoldExpr *E) {
   E->SubExprs[0] = Record.readSubExpr();
   E->SubExprs[1] = Record.readSubExpr();
   E->SubExprs[2] = Record.readSubExpr();
-  E->Opcode = (BinaryOperatorKind)Record.readInt();
+  E->CXXFoldExprBits.Opcode = (BinaryOperatorKind)Record.readInt();
 }
 
 void ASTStmtReader::VisitCXXParenListInitExpr(CXXParenListInitExpr *E) {
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index fc24b76..767e740 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2212,8 +2212,8 @@ void ASTStmtWriter::VisitSizeOfPackExpr(SizeOfPackExpr *E) {
 
 void ASTStmtWriter::VisitPackIndexingExpr(PackIndexingExpr *E) {
   VisitExpr(E);
-  Record.push_back(E->TransformedExpressions);
-  Record.push_back(E->FullySubstituted);
+  Record.push_back(E->PackIndexingExprBits.TransformedExpressions);
+  Record.push_back(E->PackIndexingExprBits.FullySubstituted);
   Record.AddSourceLocation(E->getEllipsisLoc());
   Record.AddSourceLocation(E->getRSquareLoc());
   Record.AddStmt(E->getPackIdExpression());
@@ -2278,7 +2278,7 @@ void ASTStmtWriter::VisitCXXFoldExpr(CXXFoldExpr *E) {
   Record.AddStmt(E->SubExprs[0]);
   Record.AddStmt(E->SubExprs[1]);
   Record.AddStmt(E->SubExprs[2]);
-  Record.push_back(E->Opcode);
+  Record.push_back(E->CXXFoldExprBits.Opcode);
   Code = serialization::EXPR_CXX_FOLD;
 }
 
diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors-msvc.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors-msvc.cpp
index 435feec..3ed75b9 100644
--- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors-msvc.cpp
+++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors-msvc.cpp
@@ -1,7 +1,13 @@
-// RUN: not %clang_cc1 -triple aarch64-unknown-windows-msvc %s -emit-llvm \
-// RUN:   -o - 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-unknown-windows-msvc %s -emit-llvm \
+// RUN:   -o - | FileCheck %s
 
 template<typename T> struct S {};
 
-// CHECK: cannot mangle this built-in type: __SVInt8_t yet
+// CHECK: void @"?f1@@YAXU?$S@U__SVInt8_t@__clang@@@@@Z"
 void f1(S<__SVInt8_t>) {}
+// CHECK: void @"?f2@@YAXU?$S@U__SVInt32_t@__clang@@@@@Z"
+void f2(S<__SVInt32_t>) {}
+// CHECK: void @"?f3@@YAXU?$S@U__SVBool_t@__clang@@@@@Z"
+void f3(S<__SVBool_t>) {}
+// CHECK: void @"?f4@@YAXU?$S@U__clang_svfloat64x4_t@__clang@@@@@Z"
+void f4(S<__clang_svfloat64x4_t>) {}
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index f87e212e..ed2994d 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -374,6 +374,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wmemcmp
     libc.src.wchar.wmempcpy
     libc.src.wchar.wmemcpy
+    libc.src.wchar.wcsncpy
     libc.src.wchar.wcscat
     libc.src.wchar.wcsstr
     libc.src.wchar.wcsncat
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 087ace7..18b77a2 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -98,6 +98,14 @@ functions:
     standards:
       - stdc
     return_type: wchar_t *
+    arguments: 
+      - type: __restrict wchar_t *
+      - type: const __restrict wchar_t *
+      - type: size_t
+  - name: wcsncpy
+    standards:
+      - stdc
+    return_type: wchar_t *
     arguments:
       - type: __restrict wchar_t *
       - type: const __restrict wchar_t *
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 95246ad..b131d2f 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -138,6 +138,18 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  wcsncpy
+  SRCS
+    wcsncpy.cpp
+  HDRS
+    wcsncpy.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.wchar_macros
+    libc.src.string.string_utils
+)
+
+add_entrypoint_object(
   wcscat
   SRCS
     wcscat.cpp
diff --git a/libc/src/wchar/wcsncpy.cpp b/libc/src/wchar/wcsncpy.cpp
new file mode 100644
index 0000000..e7ae9a4
--- /dev/null
+++ b/libc/src/wchar/wcsncpy.cpp
@@ -0,0 +1,33 @@
+//===-- Implementation of wcsncpy -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsncpy.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(wchar_t *, wcsncpy,
+                   (wchar_t *__restrict s1, const wchar_t *__restrict s2,
+                    size_t n)) {
+  size_t i = 0;
+  // Copy up until \0 is found.
+  for (; i < n && s2[i] != L'\0'; ++i)
+    s1[i] = s2[i];
+  // When s2 is shorter than n, append \0.
+  for (; i < n; ++i)
+    s1[i] = L'\0';
+  return s1;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsncpy.h b/libc/src/wchar/wcsncpy.h
new file mode 100644
index 0000000..06c23f2
--- /dev/null
+++ b/libc/src/wchar/wcsncpy.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for wcsncpy ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSNCPY_H
+#define LLVM_LIBC_SRC_WCHAR_WCSNCPY_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+wchar_t *wcsncpy(wchar_t *__restrict s1, const wchar_t *__restrict s2,
+                 size_t n);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSNCPY_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 859d33a..89c3048 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -136,6 +136,16 @@ add_libc_test(
 )
 
 add_libc_test(
+  wcsncpy_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcsncpy_test.cpp
+  DEPENDS
+    libc.src.wchar.wcsncpy
+)
+
+add_libc_test(
   wcscat_test
   SUITE
     libc_wchar_unittests
diff --git a/libc/test/src/wchar/wcsncpy_test.cpp b/libc/test/src/wchar/wcsncpy_test.cpp
new file mode 100644
index 0000000..9b5ffbe2
--- /dev/null
+++ b/libc/test/src/wchar/wcsncpy_test.cpp
@@ -0,0 +1,66 @@
+//===-- Unittests for wcsncpy ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcsncpy.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSNCpyTest, CopyZero) {
+  // Dest should remain unchanged.
+  wchar_t dest[3] = {L'a', L'b', L'\0'};
+  const wchar_t *src = L"x";
+  LIBC_NAMESPACE::wcsncpy(dest, src, 0);
+  ASSERT_TRUE(dest[0] == L'a');
+  ASSERT_TRUE(dest[1] == L'b');
+  ASSERT_TRUE(dest[2] == L'\0');
+}
+
+TEST(LlvmLibcWCSNCpyTest, CopyFullIntoEmpty) {
+  // Dest should be the exact same as src.
+  wchar_t dest[15];
+  const wchar_t *src = L"aaaaabbbbccccc";
+  LIBC_NAMESPACE::wcsncpy(dest, src, 15);
+  for (int i = 0; i < 15; i++)
+    ASSERT_TRUE(dest[i] == src[i]);
+}
+
+TEST(LlvmLibcWCSNCpyTest, CopyPartial) {
+  // First two characters of dest should be the first two characters of src.
+  wchar_t dest[] = {L'a', L'b', L'c', L'd', L'\0'};
+  const wchar_t *src = L"1234";
+  LIBC_NAMESPACE::wcsncpy(dest, src, 2);
+  ASSERT_TRUE(dest[0] == L'1');
+  ASSERT_TRUE(dest[1] == L'2');
+  ASSERT_TRUE(dest[2] == L'c');
+  ASSERT_TRUE(dest[3] == L'd');
+  ASSERT_TRUE(dest[4] == L'\0');
+}
+
+TEST(LlvmLibcWCSNCpyTest, CopyNullTerminator) {
+  // Null terminator should copy into dest.
+  wchar_t dest[] = {L'a', L'b', L'c', L'd', L'\0'};
+  const wchar_t src[] = {L'\0', L'y'};
+  LIBC_NAMESPACE::wcsncpy(dest, src, 1);
+  ASSERT_TRUE(dest[0] == L'\0');
+  ASSERT_TRUE(dest[1] == L'b');
+  ASSERT_TRUE(dest[2] == L'c');
+  ASSERT_TRUE(dest[3] == L'd');
+  ASSERT_TRUE(dest[4] == L'\0');
+}
+
+TEST(LlvmLibcWCSNCpyTest, CopyPastSrc) {
+  // Copying past src should fill with null terminator.
+  wchar_t dest[] = {L'a', L'b', L'c', L'd', L'\0'};
+  const wchar_t src[] = {L'x', L'\0'};
+  LIBC_NAMESPACE::wcsncpy(dest, src, 4);
+  ASSERT_TRUE(dest[0] == L'x');
+  ASSERT_TRUE(dest[1] == L'\0');
+  ASSERT_TRUE(dest[2] == L'\0');
+  ASSERT_TRUE(dest[3] == L'\0');
+  ASSERT_TRUE(dest[4] == L'\0');
+}
diff --git a/lldb/scripts/framework-header-fix.sh b/lldb/scripts/framework-header-fix.sh
index 3459dd9..345579c 100755
--- a/lldb/scripts/framework-header-fix.sh
+++ b/lldb/scripts/framework-header-fix.sh
@@ -7,11 +7,5 @@ for file in `find $1 -name "*.h"`
 do
   /usr/bin/sed -i.bak 's/\(#include\)[ ]*"lldb\/\(API\/\)\{0,1\}\(.*\)"/\1 <LLDB\/\3>/1' "$file"
   /usr/bin/sed -i.bak 's|<LLDB/Utility|<LLDB|' "$file"
-  LLDB_VERSION=`echo $2 | /usr/bin/sed -E 's/^([0-9]+).([0-9]+).([0-9]+)(.[0-9]+)?$/\\1/g'`
-  LLDB_REVISION=`echo $2 | /usr/bin/sed -E 's/^([0-9]+).([0-9]+).([0-9]+)(.[0-9]+)?$/\\3/g'`
-  LLDB_VERSION_STRING=`echo $2`
-  /usr/bin/sed -i.bak "s|//#define LLDB_VERSION$|#define LLDB_VERSION $LLDB_VERSION |" "$file"
-  /usr/bin/sed -i.bak "s|//#define LLDB_REVISION|#define LLDB_REVISION $LLDB_REVISION |" "$file"
-  /usr/bin/sed -i.bak "s|//#define LLDB_VERSION_STRING|#define LLDB_VERSION_STRING \"$LLDB_VERSION_STRING\" |" "$file"
   rm -f "$file.bak"
 done
diff --git a/lldb/scripts/version-header-fix.py b/lldb/scripts/version-header-fix.py
new file mode 100755
index 0000000..fb26ee1
--- /dev/null
+++ b/lldb/scripts/version-header-fix.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""
+Usage: <path/to/input-header.h> <path/to/output-header.h> LLDB_MAJOR_VERSION LLDB_MINOR_VERSION LLDB_PATCH_VERSION
+
+This script uncomments and populates the versioning information in lldb-defines.h
+"""
+
+import argparse
+import os
+import re
+
+LLDB_VERSION_REGEX = re.compile(r"//\s*#define LLDB_VERSION\s*$", re.M)
+LLDB_REVISION_REGEX = re.compile(r"//\s*#define LLDB_REVISION\s*$", re.M)
+LLDB_VERSION_STRING_REGEX = re.compile(r"//\s*#define LLDB_VERSION_STRING\s*$", re.M)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input_path")
+    parser.add_argument("output_path")
+    parser.add_argument("lldb_version_major")
+    parser.add_argument("lldb_version_minor")
+    parser.add_argument("lldb_version_patch")
+    args = parser.parse_args()
+    input_path = str(args.input_path)
+    output_path = str(args.output_path)
+    lldb_version_major = args.lldb_version_major
+    lldb_version_minor = args.lldb_version_minor
+    lldb_version_patch = args.lldb_version_patch
+
+    with open(input_path, "r") as input_file:
+        lines = input_file.readlines()
+        file_buffer = "".join(lines)
+
+    with open(output_path, "w") as output_file:
+        # For the defines in lldb-defines.h that define the major, minor and version string
+        # uncomment each define and populate its value using the arguments passed in.
+        # e.g. //#define LLDB_VERSION -> #define LLDB_VERSION <LLDB_MAJOR_VERSION>
+        file_buffer = re.sub(
+            LLDB_VERSION_REGEX,
+            r"#define LLDB_VERSION " + lldb_version_major,
+            file_buffer,
+        )
+
+        file_buffer = re.sub(
+            LLDB_REVISION_REGEX,
+            r"#define LLDB_REVISION " + lldb_version_patch,
+            file_buffer,
+        )
+        file_buffer = re.sub(
+            LLDB_VERSION_STRING_REGEX,
+            r'#define LLDB_VERSION_STRING "{0}.{1}.{2}"'.format(
+                lldb_version_major, lldb_version_minor, lldb_version_patch
+            ),
+            file_buffer,
+        )
+        output_file.write(file_buffer)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt
index 3bc5696..4139f8a 100644
--- a/lldb/source/API/CMakeLists.txt
+++ b/lldb/source/API/CMakeLists.txt
@@ -290,6 +290,45 @@ else()
   endif()
 endif()
 
+# Stage all headers in the include directory in the build dir.
+file(GLOB public_headers ${LLDB_SOURCE_DIR}/include/lldb/API/*.h)
+set(lldb_header_staging_dir ${CMAKE_BINARY_DIR}/include/lldb)
+file(GLOB root_public_headers ${LLDB_SOURCE_DIR}/include/lldb/lldb-*.h)
+file(GLOB root_private_headers ${LLDB_SOURCE_DIR}/include/lldb/lldb-private*.h)
+list(REMOVE_ITEM root_public_headers ${root_private_headers})
+
+find_program(unifdef_EXECUTABLE unifdef)
+
+foreach(header
+    ${public_headers}
+    ${generated_public_headers}
+    ${root_public_headers})
+  get_filename_component(basename ${header} NAME)
+  set(staged_header ${lldb_header_staging_dir}/${basename})
+
+  if(unifdef_EXECUTABLE)
+    # unifdef returns 0 when the file is unchanged and 1 if something was changed.
+    # That means if we successfully remove SWIG code, the build system believes
+    # that the command has failed and stops. This is undesirable.
+    set(copy_command ${unifdef_EXECUTABLE} -USWIG -o ${staged_header} ${header} || (exit 0))
+  else()
+    set(copy_command ${CMAKE_COMMAND} -E copy ${header} ${staged_header})
+  endif()
+
+  add_custom_command(
+    DEPENDS ${header} OUTPUT ${staged_header}
+    COMMAND ${copy_command}
+    COMMENT "LLDB headers: stage LLDB headers in include directory")
+
+  list(APPEND lldb_staged_headers ${staged_header})
+endforeach()
+
+add_custom_command(TARGET liblldb POST_BUILD
+  COMMAND ${LLDB_SOURCE_DIR}/scripts/version-header-fix.py ${LLDB_SOURCE_DIR}/include/lldb/lldb-defines.h ${lldb_header_staging_dir}/lldb-defines.h ${LLDB_VERSION_MAJOR} ${LLDB_VERSION_MINOR} ${LLDB_VERSION_PATCH}
+)
+add_custom_target(liblldb-header-staging DEPENDS ${lldb_staged_headers})
+add_dependencies(liblldb liblldb-header-staging)
+
 if(LLDB_BUILD_FRAMEWORK)
   include(LLDBFramework)
 
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index fec8d29..9f77fbc 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -1978,10 +1978,10 @@ void ClangExpressionDeclMap::AddContextClassType(NameSearchContext &context,
       copied_clang_type.GetCompleteType()) {
     CompilerType void_clang_type =
         m_clang_ast_context->GetBasicType(eBasicTypeVoid);
-    CompilerType void_ptr_clang_type = void_clang_type.GetPointerType();
+    std::array<CompilerType, 1> args{void_clang_type.GetPointerType()};
 
     CompilerType method_type = m_clang_ast_context->CreateFunctionType(
-        void_clang_type, &void_ptr_clang_type, 1, false, 0);
+        void_clang_type, args, false, 0);
 
     const bool is_virtual = false;
     const bool is_static = false;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp
index 87c37e5..9d84af4 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp
@@ -150,8 +150,9 @@ lldb_private::formatters::StdlibCoroutineHandleSyntheticFrontEnd::Update() {
   lldb::ProcessSP process_sp = target_sp->GetProcessSP();
   auto ptr_size = process_sp->GetAddressByteSize();
   CompilerType void_type = ast_ctx->GetBasicType(lldb::eBasicTypeVoid);
+  std::array<CompilerType, 1> args{void_type};
   CompilerType coro_func_type = ast_ctx->CreateFunctionType(
-      /*result_type=*/void_type, /*args=*/&void_type, /*num_args=*/1,
+      /*result_type=*/void_type, args,
       /*is_variadic=*/false, /*qualifiers=*/0);
   CompilerType coro_func_ptr_type = coro_func_type.GetPointerType();
   m_resume_ptr_sp = CreateValueObjectFromAddress(
diff --git a/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp b/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp
index c0b931f..f4d0323 100644
--- a/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp
+++ b/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp
@@ -489,8 +489,8 @@ SymbolFileCTF::CreateFunction(const CTFFunction &ctf_function) {
         llvm::inconvertibleErrorCode());
 
   CompilerType func_type = m_ast->CreateFunctionType(
-      ret_type->GetFullCompilerType(), arg_types.data(), arg_types.size(),
-      ctf_function.variadic, 0, clang::CallingConv::CC_C);
+      ret_type->GetFullCompilerType(), arg_types, ctf_function.variadic, 0,
+      clang::CallingConv::CC_C);
 
   Declaration decl;
   return MakeType(ctf_function.uid, ConstString(ctf_function.name), 0, nullptr,
@@ -814,8 +814,7 @@ size_t SymbolFileCTF::ParseFunctions(CompileUnit &cu) {
       // Create function type.
       CompilerType func_type = m_ast->CreateFunctionType(
           ret_type ? ret_type->GetFullCompilerType() : CompilerType(),
-          arg_types.data(), arg_types.size(), is_variadic, 0,
-          clang::CallingConv::CC_C);
+          arg_types, is_variadic, 0, clang::CallingConv::CC_C);
       lldb::user_id_t function_type_uid = m_types.size() + 1;
       TypeSP type_sp =
           MakeType(function_type_uid, symbol->GetName(), 0, nullptr,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 0c26c27..620501b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -1309,11 +1309,10 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
 
   // clang_type will get the function prototype clang type after this
   // call
-  CompilerType clang_type =
-      m_ast.CreateFunctionType(return_clang_type, function_param_types.data(),
-                               function_param_types.size(), is_variadic,
-                               GetCXXMethodCVQuals(die, object_parameter),
-                               calling_convention, attrs.ref_qual);
+  CompilerType clang_type = m_ast.CreateFunctionType(
+      return_clang_type, function_param_types, is_variadic,
+      GetCXXMethodCVQuals(die, object_parameter), calling_convention,
+      attrs.ref_qual);
 
   if (attrs.name) {
     bool type_handled = false;
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
index f7cde5d..702ec5e 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
@@ -1216,8 +1216,8 @@ clang::QualType PdbAstBuilder::CreateFunctionType(
     return {};
 
   CompilerType return_ct = ToCompilerType(return_type);
-  CompilerType func_sig_ast_type = m_clang.CreateFunctionType(
-      return_ct, arg_types.data(), arg_types.size(), is_variadic, 0, *cc);
+  CompilerType func_sig_ast_type =
+      m_clang.CreateFunctionType(return_ct, arg_types, is_variadic, 0, *cc);
 
   return clang::QualType::getFromOpaquePtr(
       func_sig_ast_type.GetOpaqueQualType());
diff --git a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
index c6dd72e..0090d8f 100644
--- a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
+++ b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
@@ -653,9 +653,8 @@ lldb::TypeSP PDBASTParser::CreateLLDBTypeFromPDBType(const PDBSymbol &type) {
     if (func_sig->isVolatileType())
       type_quals |= clang::Qualifiers::Volatile;
     auto cc = TranslateCallingConvention(func_sig->getCallingConvention());
-    CompilerType func_sig_ast_type =
-        m_ast.CreateFunctionType(return_ast_type, arg_list.data(),
-                                 arg_list.size(), is_variadic, type_quals, cc);
+    CompilerType func_sig_ast_type = m_ast.CreateFunctionType(
+        return_ast_type, arg_list, is_variadic, type_quals, cc);
 
     AddSourceInfoToDecl(type, decl);
     return m_ast.GetSymbolFile()->MakeType(
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 68bb3dc..2930241 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -2181,25 +2181,22 @@ FunctionDecl *TypeSystemClang::CreateFunctionDeclaration(
 }
 
 CompilerType TypeSystemClang::CreateFunctionType(
-    const CompilerType &result_type, const CompilerType *args,
-    unsigned num_args, bool is_variadic, unsigned type_quals,
-    clang::CallingConv cc, clang::RefQualifierKind ref_qual) {
+    const CompilerType &result_type, llvm::ArrayRef<CompilerType> args,
+    bool is_variadic, unsigned type_quals, clang::CallingConv cc,
+    clang::RefQualifierKind ref_qual) {
   if (!result_type || !ClangUtil::IsClangType(result_type))
     return CompilerType(); // invalid return type
 
   std::vector<QualType> qual_type_args;
-  if (num_args > 0 && args == nullptr)
-    return CompilerType(); // invalid argument array passed in
-
   // Verify that all arguments are valid and the right type
-  for (unsigned i = 0; i < num_args; ++i) {
-    if (args[i]) {
+  for (const auto &arg : args) {
+    if (arg) {
       // Make sure we have a clang type in args[i] and not a type from another
       // language whose name might match
-      const bool is_clang_type = ClangUtil::IsClangType(args[i]);
+      const bool is_clang_type = ClangUtil::IsClangType(arg);
       lldbassert(is_clang_type);
       if (is_clang_type)
-        qual_type_args.push_back(ClangUtil::GetQualType(args[i]));
+        qual_type_args.push_back(ClangUtil::GetQualType(arg));
       else
         return CompilerType(); //  invalid argument type (must be a clang type)
     } else
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
index f3235da..63dee9d 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
@@ -480,9 +480,9 @@ public:
       clang::StorageClass storage, bool is_inline);
 
   CompilerType
-  CreateFunctionType(const CompilerType &result_type, const CompilerType *args,
-                     unsigned num_args, bool is_variadic, unsigned type_quals,
-                     clang::CallingConv cc = clang::CC_C,
+  CreateFunctionType(const CompilerType &result_type,
+                     llvm::ArrayRef<CompilerType> args, bool is_variadic,
+                     unsigned type_quals, clang::CallingConv cc = clang::CC_C,
                      clang::RefQualifierKind ref_qual = clang::RQ_None);
 
   clang::ParmVarDecl *
diff --git a/lldb/test/Shell/Scripts/Inputs/lldb-defines.h b/lldb/test/Shell/Scripts/Inputs/lldb-defines.h
new file mode 100644
index 0000000..8d3090e
--- /dev/null
+++ b/lldb/test/Shell/Scripts/Inputs/lldb-defines.h
@@ -0,0 +1,7 @@
+// This is a truncated version of lldb-defines.h used to test the script
+// that fixes up its versioning info.
+
+// The script needs to uncomment these lines and populate the info for versioning.
+// #define LLDB_VERSION
+// #define LLDB_REVISION
+// #define LLDB_VERSION_STRING
diff --git a/lldb/test/Shell/Scripts/TestVersionFixScript.test b/lldb/test/Shell/Scripts/TestVersionFixScript.test
new file mode 100644
index 0000000..78cc987
--- /dev/null
+++ b/lldb/test/Shell/Scripts/TestVersionFixScript.test
@@ -0,0 +1,11 @@
+# Create a temp dir for output and run the version fix script on the truncated version of lldb-defines.h in the inputs dir.
+RUN: mkdir -p %t/Outputs
+RUN: %python %p/../../../scripts/version-header-fix.py %p/Inputs/lldb-defines.h %t/Outputs/lldb-defines.h 21 0 12
+
+# Check the output
+RUN: cat %t/Outputs/lldb-defines.h | FileCheck %s
+
+# The LLDB version defines must be uncommented and filled in with the values passed into the script.
+CHECK: {{^}}#define LLDB_VERSION 21
+CHECK: {{^}}#define LLDB_REVISION 12
+CHECK: {{^}}#define LLDB_VERSION_STRING "21.0.12"
diff --git a/lldb/tools/lldb-dap/Handler/NextRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/NextRequestHandler.cpp
index 3fa1676..2b48350 100644
--- a/lldb/tools/lldb-dap/Handler/NextRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/NextRequestHandler.cpp
@@ -8,6 +8,7 @@
 
 #include "DAP.h"
 #include "EventHelper.h"
+#include "LLDBUtils.h"
 #include "Protocol/ProtocolTypes.h"
 #include "RequestHandler.h"
 #include "llvm/Support/Error.h"
@@ -30,16 +31,21 @@ Error NextRequestHandler::Run(const NextArguments &args) const {
   if (!thread.IsValid())
     return make_error<DAPError>("invalid thread");
 
+  if (!SBDebugger::StateIsStoppedState(dap.target.GetProcess().GetState()))
+    return make_error<NotStoppedError>();
+
   // Remember the thread ID that caused the resume so we can set the
   // "threadCausedFocus" boolean value in the "stopped" events.
   dap.focus_tid = thread.GetThreadID();
+  lldb::SBError error;
   if (args.granularity == eSteppingGranularityInstruction) {
-    thread.StepInstruction(/*step_over=*/true);
+    thread.StepInstruction(/*step_over=*/true, error);
   } else {
-    thread.StepOver(args.singleThread ? eOnlyThisThread : eOnlyDuringStepping);
+    thread.StepOver(args.singleThread ? eOnlyThisThread : eOnlyDuringStepping,
+                    error);
   }
 
-  return Error::success();
+  return ToError(error);
 }
 
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/Handler/StepInRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/StepInRequestHandler.cpp
index 15f242a..6742c79 100644
--- a/lldb/tools/lldb-dap/Handler/StepInRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/StepInRequestHandler.cpp
@@ -8,6 +8,7 @@
 
 #include "DAP.h"
 #include "EventHelper.h"
+#include "LLDBUtils.h"
 #include "Protocol/ProtocolRequests.h"
 #include "Protocol/ProtocolTypes.h"
 #include "RequestHandler.h"
@@ -39,9 +40,13 @@ Error StepInRequestHandler::Run(const StepInArguments &args) const {
   // "threadCausedFocus" boolean value in the "stopped" events.
   dap.focus_tid = thread.GetThreadID();
 
+  if (!SBDebugger::StateIsStoppedState(dap.target.GetProcess().GetState()))
+    return make_error<NotStoppedError>();
+
+  lldb::SBError error;
   if (args.granularity == eSteppingGranularityInstruction) {
-    thread.StepInstruction(/*step_over=*/false);
-    return Error::success();
+    thread.StepInstruction(/*step_over=*/false, error);
+    return ToError(error);
   }
 
   std::string step_in_target;
@@ -50,8 +55,9 @@ Error StepInRequestHandler::Run(const StepInArguments &args) const {
     step_in_target = it->second;
 
   RunMode run_mode = args.singleThread ? eOnlyThisThread : eOnlyDuringStepping;
-  thread.StepInto(step_in_target.c_str(), run_mode);
-  return Error::success();
+  thread.StepInto(step_in_target.c_str(), LLDB_INVALID_LINE_NUMBER, error,
+                  run_mode);
+  return ToError(error);
 }
 
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/Handler/StepOutRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/StepOutRequestHandler.cpp
index 6b98582..e896e03 100644
--- a/lldb/tools/lldb-dap/Handler/StepOutRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/StepOutRequestHandler.cpp
@@ -8,6 +8,7 @@
 
 #include "DAP.h"
 #include "EventHelper.h"
+#include "LLDBUtils.h"
 #include "Protocol/ProtocolRequests.h"
 #include "RequestHandler.h"
 #include "llvm/Support/Error.h"
@@ -32,12 +33,17 @@ Error StepOutRequestHandler::Run(const StepOutArguments &arguments) const {
   if (!thread.IsValid())
     return make_error<DAPError>("invalid thread");
 
+  if (!lldb::SBDebugger::StateIsStoppedState(
+          dap.target.GetProcess().GetState()))
+    return make_error<NotStoppedError>();
+
   // Remember the thread ID that caused the resume so we can set the
   // "threadCausedFocus" boolean value in the "stopped" events.
   dap.focus_tid = thread.GetThreadID();
-  thread.StepOut();
+  lldb::SBError error;
+  thread.StepOut(error);
 
-  return Error::success();
+  return ToError(error);
 }
 
 } // namespace lldb_dap
diff --git a/lldb/unittests/Symbol/TestTypeSystemClang.cpp b/lldb/unittests/Symbol/TestTypeSystemClang.cpp
index a9b0c87..d555d27 100644
--- a/lldb/unittests/Symbol/TestTypeSystemClang.cpp
+++ b/lldb/unittests/Symbol/TestTypeSystemClang.cpp
@@ -866,8 +866,7 @@ TEST_F(TestTypeSystemClang, TestFunctionTemplateConstruction) {
   clang::TranslationUnitDecl *TU = m_ast->GetTranslationUnitDecl();
 
   // Prepare the declarations/types we need for the template.
-  CompilerType clang_type =
-      m_ast->CreateFunctionType(int_type, nullptr, 0U, false, 0U);
+  CompilerType clang_type = m_ast->CreateFunctionType(int_type, {}, false, 0U);
   FunctionDecl *func = m_ast->CreateFunctionDeclaration(
       TU, OptionalClangModuleID(), "foo", clang_type, StorageClass::SC_None,
       false);
@@ -895,8 +894,7 @@ TEST_F(TestTypeSystemClang, TestFunctionTemplateInRecordConstruction) {
   clang::TagDecl *record = ClangUtil::GetAsTagDecl(record_type);
 
   // Prepare the declarations/types we need for the template.
-  CompilerType clang_type =
-      m_ast->CreateFunctionType(int_type, nullptr, 0U, false, 0U);
+  CompilerType clang_type = m_ast->CreateFunctionType(int_type, {}, false, 0U);
   // We create the FunctionDecl for the template in the TU DeclContext because:
   // 1. FunctionDecls can't be in a Record (only CXXMethodDecls can).
   // 2. It is mirroring the behavior of DWARFASTParserClang::ParseSubroutine.
@@ -930,10 +928,9 @@ TEST_F(TestTypeSystemClang, TestDeletingImplicitCopyCstrDueToMoveCStr) {
 
   // Create a move constructor that will delete the implicit copy constructor.
   CompilerType return_type = m_ast->GetBasicType(lldb::eBasicTypeVoid);
-  CompilerType param_type = t.GetRValueReferenceType();
-  CompilerType function_type =
-      m_ast->CreateFunctionType(return_type, &param_type, /*num_params*/ 1,
-                                /*variadic=*/false, /*quals*/ 0U);
+  std::array<CompilerType, 1> args{t.GetRValueReferenceType()};
+  CompilerType function_type = m_ast->CreateFunctionType(
+      return_type, args, /*variadic=*/false, /*quals*/ 0U);
   bool is_virtual = false;
   bool is_static = false;
   bool is_inline = false;
@@ -974,10 +971,9 @@ TEST_F(TestTypeSystemClang, TestNotDeletingUserCopyCstrDueToMoveCStr) {
   bool is_artificial = false;
   // Create a move constructor.
   {
-    CompilerType param_type = t.GetRValueReferenceType();
-    CompilerType function_type =
-        m_ast->CreateFunctionType(return_type, &param_type, /*num_params*/ 1,
-                                  /*variadic=*/false, /*quals*/ 0U);
+    std::array<CompilerType, 1> args{t.GetRValueReferenceType()};
+    CompilerType function_type = m_ast->CreateFunctionType(
+        return_type, args, /*variadic=*/false, /*quals*/ 0U);
     m_ast->AddMethodToCXXRecordType(
         t.GetOpaqueQualType(), class_name, nullptr, function_type,
         lldb::AccessType::eAccessPublic, is_virtual, is_static, is_inline,
@@ -985,9 +981,10 @@ TEST_F(TestTypeSystemClang, TestNotDeletingUserCopyCstrDueToMoveCStr) {
   }
   // Create a copy constructor.
   {
-    CompilerType param_type = t.GetLValueReferenceType().AddConstModifier();
+    std::array<CompilerType, 1> args{
+        t.GetLValueReferenceType().AddConstModifier()};
     CompilerType function_type =
-        m_ast->CreateFunctionType(return_type, &param_type, /*num_params*/ 1,
+        m_ast->CreateFunctionType(return_type, args,
                                   /*variadic=*/false, /*quals*/ 0U);
     m_ast->AddMethodToCXXRecordType(
         t.GetOpaqueQualType(), class_name, nullptr, function_type,
@@ -1012,10 +1009,9 @@ TEST_F(TestTypeSystemClang, AddMethodToObjCObjectType) {
 
   // Add a method to the interface.
   std::vector<CompilerType> args;
-  CompilerType func_type =
-      m_ast->CreateFunctionType(m_ast->GetBasicType(lldb::eBasicTypeInt),
-                                args.data(), args.size(), /*variadic*/ false,
-                                /*quals*/ 0, clang::CallingConv::CC_C);
+  CompilerType func_type = m_ast->CreateFunctionType(
+      m_ast->GetBasicType(lldb::eBasicTypeInt), args, /*variadic*/ false,
+      /*quals*/ 0, clang::CallingConv::CC_C);
   bool variadic = false;
   bool artificial = false;
   bool objc_direct = false;
@@ -1098,9 +1094,9 @@ TEST_F(TestTypeSystemClang, AddMethodToCXXRecordType_ParmVarDecls) {
   llvm::SmallVector<CompilerType> param_types{
       m_ast->GetBasicType(lldb::eBasicTypeInt),
       m_ast->GetBasicType(lldb::eBasicTypeShort)};
-  CompilerType function_type = m_ast->CreateFunctionType(
-      return_type, param_types.data(), /*num_params*/ param_types.size(),
-      /*variadic=*/false, /*quals*/ 0U);
+  CompilerType function_type =
+      m_ast->CreateFunctionType(return_type, param_types,
+                                /*variadic=*/false, /*quals*/ 0U);
   m_ast->AddMethodToCXXRecordType(
       t.GetOpaqueQualType(), "myFunc", nullptr, function_type,
       lldb::AccessType::eAccessPublic, is_virtual, is_static, is_inline,
diff --git a/llvm/include/module.modulemap b/llvm/include/module.modulemap
index b378023..9d7dbb8 100644
--- a/llvm/include/module.modulemap
+++ b/llvm/include/module.modulemap
@@ -178,9 +178,10 @@ module LLVM_DebugInfo_CodeView {
   module * { export * }
 
   // These are intended for (repeated) textual inclusion.
+  textual header "llvm/DebugInfo/CodeView/CodeViewLanguages.def"
   textual header "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
-  textual header "llvm/DebugInfo/CodeView/CodeViewTypes.def"
   textual header "llvm/DebugInfo/CodeView/CodeViewSymbols.def"
+  textual header "llvm/DebugInfo/CodeView/CodeViewTypes.def"
 }
 
 module LLVM_DWARFLinker {
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index db03730..d8451b8 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -278,4 +278,17 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
     }
     setLibcallName(RTLIB::MULO_I128, nullptr);
   }
+
+  if (TT.isSystemZ() && TT.isOSzOS()) {
+    struct RTLibCallMapping {
+      RTLIB::Libcall Code;
+      const char *Name;
+    };
+    static RTLibCallMapping RTLibCallCommon[] = {
+#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
+#include "ZOSLibcallNames.def"
+    };
+    for (auto &E : RTLibCallCommon)
+      setLibcallName(E.Code, E.Name);
+  }
 }
diff --git a/llvm/lib/Target/SystemZ/ZOSLibcallNames.def b/llvm/lib/IR/ZOSLibcallNames.def
index 12a0152..12a0152 100644
--- a/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
+++ b/llvm/lib/IR/ZOSLibcallNames.def
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e2c12bb..9f51cae 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1336,7 +1336,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::MUL, MVT::v1i64, Custom);
 
     // Saturates
-    for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
+    for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
                     MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
       setOperationAction(ISD::SADDSAT, VT, Legal);
       setOperationAction(ISD::UADDSAT, VT, Legal);
@@ -1984,14 +1984,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(Op, MVT::f16, Promote);
 
   if (Subtarget->isWindowsArm64EC()) {
-    // FIXME: are there intrinsics we need to exclude from this?
-    for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
-      auto code = static_cast<RTLIB::Libcall>(i);
-      auto libcallName = getLibcallName(code);
-      if ((libcallName != nullptr) && (libcallName[0] != '#')) {
-        setLibcallName(code, Saver.save(Twine("#") + libcallName).data());
-      }
-    }
+    // FIXME: are there calls we need to exclude from this?
+#define HANDLE_LIBCALL(code, name)                                             \
+  {                                                                            \
+    const char *libcallName = getLibcallName(RTLIB::code);                     \
+    if (libcallName && libcallName[0] != '#')                                  \
+      setLibcallName(RTLIB::code, "#" #name);                                  \
+  }
+#include "llvm/IR/RuntimeLibcalls.def"
+#undef HANDLE_LIBCALL
   }
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 450e2ef..b217448 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -540,9 +540,6 @@ private:
   /// make the right decision when generating code for different targets.
   const AArch64Subtarget *Subtarget;
 
-  llvm::BumpPtrAllocator BumpAlloc;
-  llvm::StringSaver Saver{BumpAlloc};
-
   bool isExtFreeImpl(const Instruction *Ext) const override;
 
   void addTypeForNEON(MVT VT);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4f24409..4796c27 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7763,9 +7763,9 @@ multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
 }
 
 multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
-                               SDPatternOperator OpNode> {
+                               SDPatternOperator OpNode, SDPatternOperator SatOp> {
   def v1i64  : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
-    [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
+    [(set (v1i64 FPR64:$Rd), (SatOp (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
   def v1i32  : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
   def v1i16  : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
   def v1i8   : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a629bd5..a229b71 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1281,8 +1281,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
                                       int64_t &CmpValue) const {
   // The first operand can be a frame index where we'd normally expect a
   // register.
+  // FIXME: Pass subregisters out of analyzeCompare
   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
-  if (!MI.getOperand(1).isReg())
+  if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
     return false;
 
   switch (MI.getOpcode()) {
@@ -1292,6 +1293,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   case AArch64::PTEST_PP_ANY:
     SrcReg = MI.getOperand(0).getReg();
     SrcReg2 = MI.getOperand(1).getReg();
+    if (MI.getOperand(2).getSubReg())
+      return false;
+
     // Not sure about the mask and value for now...
     CmpMask = ~0;
     CmpValue = 0;
@@ -1311,6 +1315,11 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
     // Replace SUBSWrr with SUBWrr if NZCV is not used.
     SrcReg = MI.getOperand(1).getReg();
     SrcReg2 = MI.getOperand(2).getReg();
+
+    // FIXME: Pass subregisters out of analyzeCompare
+    if (MI.getOperand(2).getSubReg())
+      return false;
+
     CmpMask = ~0;
     CmpValue = 0;
     return true;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 639d7e4..7278318 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6339,19 +6339,19 @@ defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
 defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
 defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
 defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
-defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
+defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd, saddsat>;
 defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
-defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
-defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
-defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
+defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>;
+defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>;
+defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub, ssubsat>;
 defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
 defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
 defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
-defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
-defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
-defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
-defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
+defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd, uaddsat>;
+defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>;
+defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>;
+defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub, usubsat>;
 defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
 defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
 let Predicates = [HasRDM] in {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index be4876d0..d156851 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -692,20 +692,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
         const RTLIB::Libcall Op;
         const char *const Name;
         const CallingConv::ID CC;
-        const ISD::CondCode Cond;
       } MemOpsLibraryCalls[] = {
-        // Memory operations
-        // RTABI chapter 4.3.4
-        { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-        { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-        { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+          // Memory operations
+          // RTABI chapter 4.3.4
+          {RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS},
+          {RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS},
+          {RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS},
       };
 
       for (const auto &LC : MemOpsLibraryCalls) {
         setLibcallName(LC.Op, LC.Name);
         setLibcallCallingConv(LC.Op, LC.CC);
-        if (LC.Cond != ISD::SETCC_INVALID)
-          setCmpLibcallCC(LC.Op, LC.Cond);
       }
     }
   }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 74686d9..f74ca2a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -434,9 +434,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ABS, MVT::i32, Custom);
   }
 
-  if (Subtarget.useCCMovInsn())
-    setOperationAction(ISD::SELECT, XLenVT, Legal);
-  else if (!Subtarget.hasVendorXTHeadCondMov())
+  if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
     setOperationAction(ISD::SELECT, XLenVT, Custom);
 
   if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3b86a9b..f062467 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -828,19 +828,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
 
   // Default to having -disable-strictnode-mutation on
   IsStrictFPEnabled = true;
-
-  if (Subtarget.isTargetzOS()) {
-    struct RTLibCallMapping {
-      RTLIB::Libcall Code;
-      const char *Name;
-    };
-    static RTLibCallMapping RTLibCallCommon[] = {
-#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
-#include "ZOSLibcallNames.def"
-    };
-    for (auto &E : RTLibCallCommon)
-      setLibcallName(E.Code, E.Name);
-  }
 }
 
 bool SystemZTargetLowering::useSoftFloat() const {
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index e9260b6..f62361d 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -328,15 +328,33 @@ static bool tryToRecognizePopCount(Instruction &I) {
                               m_SpecificInt(Mask33))))) {
         Value *Root, *SubOp1;
         // Matching "i - ((i >> 1) & 0x55555555...)".
+        const APInt *AndMask;
         if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
             match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
-                                m_SpecificInt(Mask55)))) {
-          LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
-          IRBuilder<> Builder(&I);
-          I.replaceAllUsesWith(
-              Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
-          ++NumPopCountRecognized;
-          return true;
+                                m_APInt(AndMask)))) {
+          auto CheckAndMask = [&]() {
+            if (*AndMask == Mask55)
+              return true;
+
+            // Exact match failed, see if any bits are known to be 0 where we
+            // expect a 1 in the mask.
+            if (!AndMask->isSubsetOf(Mask55))
+              return false;
+
+            APInt NeededMask = Mask55 & ~*AndMask;
+            return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
+                                     NeededMask,
+                                     SimplifyQuery(I.getDataLayout()));
+          };
+
+          if (CheckAndMask()) {
+            LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
+            IRBuilder<> Builder(&I);
+            I.replaceAllUsesWith(
+                Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
+            ++NumPopCountRecognized;
+            return true;
+          }
         }
       }
     }
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index d420aa7a..16a3d20 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -392,6 +392,15 @@ struct PromoteMem2Reg {
   /// number.
   SmallVector<unsigned> BBNumPreds;
 
+  /// The state of incoming values for the current DFS step.
+  RenamePassData::ValVector IncomingVals;
+
+  /// The state of incoming locations for the current DFS step.
+  RenamePassData::LocationVector IncomingLocs;
+
+  // DFS work stack.
+  SmallVector<RenamePassData, 8> Worklist;
+
   /// Whether the function has the no-signed-zeros-fp-math attribute set.
   bool NoSignedZeros = false;
 
@@ -423,10 +432,7 @@ private:
   void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
                            const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
                            SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
-  void RenamePass(BasicBlock *BB, BasicBlock *Pred,
-                  RenamePassData::ValVector IncVals,
-                  RenamePassData::LocationVector IncLocs,
-                  std::vector<RenamePassData> &Worklist);
+  void RenamePass(BasicBlock *BB, BasicBlock *Pred);
   bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
 
   /// Delete dbg.assigns that have been demoted to dbg.values.
@@ -438,6 +444,20 @@ private:
       DVR->eraseFromParent();
     DVRAssignsToDelete.clear();
   }
+
+  void pushToWorklist(BasicBlock *BB, BasicBlock *Pred,
+                      RenamePassData::ValVector IncVals,
+                      RenamePassData::LocationVector IncLocs) {
+    Worklist.emplace_back(BB, Pred, std::move(IncVals), std::move(IncLocs));
+  }
+
+  RenamePassData popFromWorklist() {
+    RenamePassData R = std::move(Worklist.back());
+    Worklist.pop_back();
+    IncomingVals = std::move(R.Values);
+    IncomingLocs = std::move(R.Locations);
+    return R;
+  }
 };
 
 } // end anonymous namespace
@@ -849,29 +869,26 @@ void PromoteMem2Reg::run() {
   // Set the incoming values for the basic block to be null values for all of
   // the alloca's.  We do this in case there is a load of a value that has not
   // been stored yet.  In this case, it will get this null value.
-  RenamePassData::ValVector Values(Allocas.size());
+  IncomingVals.resize(Allocas.size());
   for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
-    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+    IncomingVals[i] = UndefValue::get(Allocas[i]->getAllocatedType());
 
   // When handling debug info, treat all incoming values as if they have unknown
   // locations until proven otherwise.
-  RenamePassData::LocationVector Locations(Allocas.size());
+  IncomingLocs.resize(Allocas.size());
 
   // The renamer uses the Visited set to avoid infinite loops.
   Visited.resize(F.getMaxBlockNumber());
 
   // Walks all basic blocks in the function performing the SSA rename algorithm
   // and inserting the phi nodes we marked as necessary
-  std::vector<RenamePassData> RenamePassWorkList;
-  RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values),
-                                  std::move(Locations));
+  pushToWorklist(&F.front(), nullptr, std::move(IncomingVals),
+                 std::move(IncomingLocs));
   do {
-    RenamePassData RPD = std::move(RenamePassWorkList.back());
-    RenamePassWorkList.pop_back();
+    RenamePassData RPD = popFromWorklist();
     // RenamePass may add new worklist entries.
-    RenamePass(RPD.BB, RPD.Pred, std::move(RPD.Values),
-               std::move(RPD.Locations), RenamePassWorkList);
-  } while (!RenamePassWorkList.empty());
+    RenamePass(RPD.BB, RPD.Pred);
+  } while (!Worklist.empty());
 
   // Remove the allocas themselves from the function.
   for (Instruction *A : Allocas) {
@@ -1096,10 +1113,7 @@ static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL,
 ///
 /// IncomingVals indicates what value each Alloca contains on exit from the
 /// predecessor block Pred.
-void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
-                                RenamePassData::ValVector IncomingVals,
-                                RenamePassData::LocationVector IncomingLocs,
-                                std::vector<RenamePassData> &Worklist) {
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
   // If we are inserting any phi nodes into this BB, they will already be in the
   // block.
   if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
@@ -1226,8 +1240,7 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
         IncomingVals = Worklist.back().Values;
         IncomingLocs = Worklist.back().Locations;
       }
-      Worklist.emplace_back(S, BB, std::move(IncomingVals),
-                            std::move(IncomingLocs));
+      pushToWorklist(S, BB, std::move(IncomingVals), std::move(IncomingLocs));
     }
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
index fa515fe..ff1feda 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
@@ -45,13 +45,7 @@ define <1 x i64> @sqadd1d(ptr %A, ptr %B) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    ldr d1, [x1]
-; CHECK-NEXT:    fmov x8, d1
-; CHECK-NEXT:    fmov x9, d0
-; CHECK-NEXT:    adds x8, x9, x8
-; CHECK-NEXT:    asr x9, x8, #63
-; CHECK-NEXT:    eor x9, x9, #0x8000000000000000
-; CHECK-NEXT:    csel x8, x9, x8, vs
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    sqadd d0, d0, d1
 ; CHECK-NEXT:    ret
   %tmp1 = load <1 x i64>, ptr %A
   %tmp2 = load <1 x i64>, ptr %B
@@ -104,11 +98,7 @@ define <1 x i64> @uqadd1d(ptr %A, ptr %B) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    ldr d1, [x1]
-; CHECK-NEXT:    fmov x8, d1
-; CHECK-NEXT:    fmov x9, d0
-; CHECK-NEXT:    adds x8, x9, x8
-; CHECK-NEXT:    csinv x8, x8, xzr, lo
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    uqadd d0, d0, d1
 ; CHECK-NEXT:    ret
   %tmp1 = load <1 x i64>, ptr %A
   %tmp2 = load <1 x i64>, ptr %B
diff --git a/llvm/test/CodeGen/AArch64/arm64-vqsub.ll b/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
index ffcb7d6..b8168eb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
@@ -45,13 +45,7 @@ define <1 x i64> @sqsub1d(ptr %A, ptr %B) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    ldr d1, [x1]
-; CHECK-NEXT:    fmov x8, d1
-; CHECK-NEXT:    fmov x9, d0
-; CHECK-NEXT:    subs x8, x9, x8
-; CHECK-NEXT:    asr x9, x8, #63
-; CHECK-NEXT:    eor x9, x9, #0x8000000000000000
-; CHECK-NEXT:    csel x8, x9, x8, vs
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    sqsub d0, d0, d1
 ; CHECK-NEXT:    ret
   %tmp1 = load <1 x i64>, ptr %A
   %tmp2 = load <1 x i64>, ptr %B
@@ -104,11 +98,7 @@ define <1 x i64> @uqsub1d(ptr %A, ptr %B) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    ldr d1, [x1]
-; CHECK-NEXT:    fmov x8, d1
-; CHECK-NEXT:    fmov x9, d0
-; CHECK-NEXT:    subs x8, x9, x8
-; CHECK-NEXT:    csel x8, xzr, x8, lo
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    uqsub d0, d0, d1
 ; CHECK-NEXT:    ret
   %tmp1 = load <1 x i64>, ptr %A
   %tmp2 = load <1 x i64>, ptr %B
diff --git a/llvm/test/CodeGen/AArch64/peephole-opt-analyzeCompare-subreg-use.mir b/llvm/test/CodeGen/AArch64/peephole-opt-analyzeCompare-subreg-use.mir
new file mode 100644
index 0000000..c9d6bc7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/peephole-opt-analyzeCompare-subreg-use.mir
@@ -0,0 +1,67 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64-- -run-pass=peephole-opt -o - %s | FileCheck %s
+
+# Make sure that analyzeCompare doesn't produce illegal folds due to
+# ignoring the subregister index on the use operands.
+
+---
+name:            analyze_compare_subreg_use_lhs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: analyze_compare_subreg_use_lhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]].sub_32, [[COPY2]], implicit-def dead $nzcv
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], killed [[SUBSWrr]], %subreg.sub_32
+    ; CHECK-NEXT: [[RORVXr:%[0-9]+]]:gpr64 = RORVXr [[COPY1]], killed [[INSERT_SUBREG]]
+    ; CHECK-NEXT: $x0 = COPY [[RORVXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:gpr64 = COPY $x1
+    %1:gpr64 = COPY $x0
+    %2:gpr32 = COPY $wzr
+    %3:gpr32 = SUBSWrr %0.sub_32, %2, implicit-def dead $nzcv
+    %4:gpr64all = IMPLICIT_DEF
+    %5:gpr64 = INSERT_SUBREG %4, killed %3, %subreg.sub_32
+    %6:gpr64 = RORVXr %1, killed %5
+    $x0 = COPY %6
+    RET_ReallyLR implicit $x0
+
+...
+
+---
+name:            analyze_compare_subreg_use_rhs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: analyze_compare_subreg_use_rhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY2]], [[COPY]].sub_32, implicit-def dead $nzcv
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], killed [[SUBSWrr]], %subreg.sub_32
+    ; CHECK-NEXT: [[RORVXr:%[0-9]+]]:gpr64 = RORVXr [[COPY1]], killed [[INSERT_SUBREG]]
+    ; CHECK-NEXT: $x0 = COPY [[RORVXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:gpr64 = COPY $x1
+    %1:gpr64 = COPY $x0
+    %2:gpr32 = COPY $wzr
+    %3:gpr32 = SUBSWrr %2, %0.sub_32, implicit-def dead $nzcv
+    %4:gpr64all = IMPLICIT_DEF
+    %5:gpr64 = INSERT_SUBREG %4, killed %3, %subreg.sub_32
+    %6:gpr64 = RORVXr %1, killed %5
+    $x0 = COPY %6
+    RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index 5f0d4c7..1c4a504 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -447,13 +447,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
 define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind {
 ; CHECK-SD-LABEL: v1i64:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ldr x8, [x1]
-; CHECK-SD-NEXT:    ldr x9, [x0]
-; CHECK-SD-NEXT:    adds x8, x9, x8
-; CHECK-SD-NEXT:    asr x9, x8, #63
-; CHECK-SD-NEXT:    eor x9, x9, #0x8000000000000000
-; CHECK-SD-NEXT:    csel x8, x9, x8, vs
-; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr d0, [x0]
+; CHECK-SD-NEXT:    ldr d1, [x1]
+; CHECK-SD-NEXT:    sqadd d0, d0, d1
 ; CHECK-SD-NEXT:    str d0, [x2]
 ; CHECK-SD-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index ed79d01..3af8587 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -449,13 +449,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
 define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind {
 ; CHECK-SD-LABEL: v1i64:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ldr x8, [x1]
-; CHECK-SD-NEXT:    ldr x9, [x0]
-; CHECK-SD-NEXT:    subs x8, x9, x8
-; CHECK-SD-NEXT:    asr x9, x8, #63
-; CHECK-SD-NEXT:    eor x9, x9, #0x8000000000000000
-; CHECK-SD-NEXT:    csel x8, x9, x8, vs
-; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr d0, [x0]
+; CHECK-SD-NEXT:    ldr d1, [x1]
+; CHECK-SD-NEXT:    sqsub d0, d0, d1
 ; CHECK-SD-NEXT:    str d0, [x2]
 ; CHECK-SD-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index dcfb517..3cfb24a 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -439,11 +439,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
 define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind {
 ; CHECK-SD-LABEL: v1i64:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ldr x8, [x1]
-; CHECK-SD-NEXT:    ldr x9, [x0]
-; CHECK-SD-NEXT:    adds x8, x9, x8
-; CHECK-SD-NEXT:    csinv x8, x8, xzr, lo
-; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr d0, [x0]
+; CHECK-SD-NEXT:    ldr d1, [x1]
+; CHECK-SD-NEXT:    uqadd d0, d0, d1
 ; CHECK-SD-NEXT:    str d0, [x2]
 ; CHECK-SD-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index 0049aba..a71cf95 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -436,11 +436,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
 define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind {
 ; CHECK-SD-LABEL: v1i64:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ldr x8, [x1]
-; CHECK-SD-NEXT:    ldr x9, [x0]
-; CHECK-SD-NEXT:    subs x8, x9, x8
-; CHECK-SD-NEXT:    csel x8, xzr, x8, lo
-; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ldr d0, [x0]
+; CHECK-SD-NEXT:    ldr d1, [x1]
+; CHECK-SD-NEXT:    uqsub d0, d0, d1
 ; CHECK-SD-NEXT:    str d0, [x2]
 ; CHECK-SD-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
index b692a80..9c88445 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
@@ -1302,3 +1302,477 @@ entry:
   store <2 x double> %shuffle.i5, ptr %out, align 8
   ret void
 }
+
+define <2 x i64> @unzip2a_dual_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; V-LABEL: unzip2a_dual_v2i64:
+; V:       # %bb.0: # %entry
+; V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V-NEXT:    vslideup.vi v8, v9, 1
+; V-NEXT:    ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v2i64:
+; ZVE32F:       # %bb.0: # %entry
+; ZVE32F-NEXT:    mv a1, a2
+; ZVE32F-NEXT:    ret
+;
+; ZIP-LABEL: unzip2a_dual_v2i64:
+; ZIP:       # %bb.0: # %entry
+; ZIP-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT:    vslideup.vi v8, v9, 1
+; ZIP-NEXT:    ret
+entry:
+  %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %c
+}
+
+define <4 x i64> @unzip2a_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; V-LABEL: unzip2a_dual_v4i64:
+; V:       # %bb.0: # %entry
+; V-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; V-NEXT:    vmv.v.i v0, 8
+; V-NEXT:    vslideup.vi v10, v9, 2
+; V-NEXT:    vslideup.vi v10, v9, 1, v0.t
+; V-NEXT:    vmv.v.i v0, 2
+; V-NEXT:    vslidedown.vi v8, v8, 1, v0.t
+; V-NEXT:    vmv.v.i v0, 12
+; V-NEXT:    vmerge.vvm v8, v8, v10, v0
+; V-NEXT:    ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v4i64:
+; ZVE32F:       # %bb.0: # %entry
+; ZVE32F-NEXT:    ld a3, 0(a2)
+; ZVE32F-NEXT:    ld a2, 16(a2)
+; ZVE32F-NEXT:    ld a4, 0(a1)
+; ZVE32F-NEXT:    ld a1, 16(a1)
+; ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, mu
+; ZVE32F-NEXT:    vmv.v.i v0, 15
+; ZVE32F-NEXT:    srli a5, a2, 32
+; ZVE32F-NEXT:    srli a6, a3, 32
+; ZVE32F-NEXT:    srli a7, a1, 32
+; ZVE32F-NEXT:    srli t0, a4, 32
+; ZVE32F-NEXT:    vmv.v.x v8, a4
+; ZVE32F-NEXT:    vmv.v.x v9, a3
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, t0
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a7
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT:    vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT:    vse32.v v9, (a0)
+; ZVE32F-NEXT:    ret
+;
+; ZIP-LABEL: unzip2a_dual_v4i64:
+; ZIP:       # %bb.0: # %entry
+; ZIP-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; ZIP-NEXT:    vmv.v.i v0, 8
+; ZIP-NEXT:    vslideup.vi v10, v9, 2
+; ZIP-NEXT:    vslideup.vi v10, v9, 1, v0.t
+; ZIP-NEXT:    vmv.v.i v0, 12
+; ZIP-NEXT:    ri.vunzip2a.vv v11, v8, v9
+; ZIP-NEXT:    vmerge.vvm v8, v11, v10, v0
+; ZIP-NEXT:    ret
+entry:
+  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i64> %c
+}
+
+define <16 x i64> @unzip2a_dual_v16i64(<16 x i64> %a, <16 x i64> %b) {
+; V-LABEL: unzip2a_dual_v16i64:
+; V:       # %bb.0: # %entry
+; V-NEXT:    lui a0, 5
+; V-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; V-NEXT:    vid.v v16
+; V-NEXT:    addi a0, a0, 1365
+; V-NEXT:    vmv.s.x v20, a0
+; V-NEXT:    li a0, -256
+; V-NEXT:    vadd.vv v21, v16, v16
+; V-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; V-NEXT:    vcompress.vm v16, v8, v20
+; V-NEXT:    vmv.s.x v0, a0
+; V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V-NEXT:    vadd.vi v8, v21, -16
+; V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; V-NEXT:    vrgatherei16.vv v16, v12, v8, v0.t
+; V-NEXT:    vmv.v.v v8, v16
+; V-NEXT:    ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v16i64:
+; ZVE32F:       # %bb.0: # %entry
+; ZVE32F-NEXT:    addi sp, sp, -256
+; ZVE32F-NEXT:    .cfi_def_cfa_offset 256
+; ZVE32F-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT:    sd s2, 232(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT:    sd s3, 224(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT:    sd s4, 216(sp) # 8-byte Folded Spill
+; ZVE32F-NEXT:    .cfi_offset ra, -8
+; ZVE32F-NEXT:    .cfi_offset s0, -16
+; ZVE32F-NEXT:    .cfi_offset s2, -24
+; ZVE32F-NEXT:    .cfi_offset s3, -32
+; ZVE32F-NEXT:    .cfi_offset s4, -40
+; ZVE32F-NEXT:    addi s0, sp, 256
+; ZVE32F-NEXT:    .cfi_def_cfa s0, 0
+; ZVE32F-NEXT:    andi sp, sp, -128
+; ZVE32F-NEXT:    ld t5, 0(a1)
+; ZVE32F-NEXT:    ld t2, 16(a1)
+; ZVE32F-NEXT:    ld a4, 32(a1)
+; ZVE32F-NEXT:    ld a3, 48(a1)
+; ZVE32F-NEXT:    ld a6, 64(a1)
+; ZVE32F-NEXT:    ld a5, 80(a1)
+; ZVE32F-NEXT:    ld a7, 96(a1)
+; ZVE32F-NEXT:    ld a1, 112(a1)
+; ZVE32F-NEXT:    ld t1, 0(a2)
+; ZVE32F-NEXT:    ld t0, 16(a2)
+; ZVE32F-NEXT:    ld t4, 32(a2)
+; ZVE32F-NEXT:    ld t3, 48(a2)
+; ZVE32F-NEXT:    ld t6, 64(a2)
+; ZVE32F-NEXT:    ld s2, 80(a2)
+; ZVE32F-NEXT:    ld s3, 96(a2)
+; ZVE32F-NEXT:    ld a2, 112(a2)
+; ZVE32F-NEXT:    srli s4, t5, 32
+; ZVE32F-NEXT:    sw t5, 0(sp)
+; ZVE32F-NEXT:    sw s4, 4(sp)
+; ZVE32F-NEXT:    srli t5, t2, 32
+; ZVE32F-NEXT:    sw t2, 8(sp)
+; ZVE32F-NEXT:    srli t2, s3, 32
+; ZVE32F-NEXT:    sw s3, 112(sp)
+; ZVE32F-NEXT:    sw t2, 116(sp)
+; ZVE32F-NEXT:    srli t2, a2, 32
+; ZVE32F-NEXT:    sw a2, 120(sp)
+; ZVE32F-NEXT:    sw t2, 124(sp)
+; ZVE32F-NEXT:    srli a2, t6, 32
+; ZVE32F-NEXT:    sw t6, 96(sp)
+; ZVE32F-NEXT:    sw a2, 100(sp)
+; ZVE32F-NEXT:    srli a2, s2, 32
+; ZVE32F-NEXT:    sw s2, 104(sp)
+; ZVE32F-NEXT:    sw a2, 108(sp)
+; ZVE32F-NEXT:    srli a2, t4, 32
+; ZVE32F-NEXT:    sw t4, 80(sp)
+; ZVE32F-NEXT:    sw a2, 84(sp)
+; ZVE32F-NEXT:    srli a2, t3, 32
+; ZVE32F-NEXT:    sw t3, 88(sp)
+; ZVE32F-NEXT:    sw a2, 92(sp)
+; ZVE32F-NEXT:    srli a2, t1, 32
+; ZVE32F-NEXT:    sw t1, 64(sp)
+; ZVE32F-NEXT:    sw a2, 68(sp)
+; ZVE32F-NEXT:    srli a2, t0, 32
+; ZVE32F-NEXT:    sw t0, 72(sp)
+; ZVE32F-NEXT:    sw a2, 76(sp)
+; ZVE32F-NEXT:    srli a2, a7, 32
+; ZVE32F-NEXT:    sw a7, 48(sp)
+; ZVE32F-NEXT:    sw a2, 52(sp)
+; ZVE32F-NEXT:    srli a2, a1, 32
+; ZVE32F-NEXT:    sw a1, 56(sp)
+; ZVE32F-NEXT:    sw a2, 60(sp)
+; ZVE32F-NEXT:    srli a1, a6, 32
+; ZVE32F-NEXT:    sw a6, 32(sp)
+; ZVE32F-NEXT:    sw a1, 36(sp)
+; ZVE32F-NEXT:    srli a1, a5, 32
+; ZVE32F-NEXT:    sw a5, 40(sp)
+; ZVE32F-NEXT:    sw a1, 44(sp)
+; ZVE32F-NEXT:    srli a1, a4, 32
+; ZVE32F-NEXT:    sw a4, 16(sp)
+; ZVE32F-NEXT:    sw a1, 20(sp)
+; ZVE32F-NEXT:    srli a1, a3, 32
+; ZVE32F-NEXT:    sw a3, 24(sp)
+; ZVE32F-NEXT:    sw a1, 28(sp)
+; ZVE32F-NEXT:    li a1, 32
+; ZVE32F-NEXT:    sw t5, 12(sp)
+; ZVE32F-NEXT:    mv a2, sp
+; ZVE32F-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; ZVE32F-NEXT:    vle32.v v8, (a2)
+; ZVE32F-NEXT:    vse32.v v8, (a0)
+; ZVE32F-NEXT:    addi sp, s0, -256
+; ZVE32F-NEXT:    .cfi_def_cfa sp, 256
+; ZVE32F-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT:    ld s2, 232(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT:    ld s3, 224(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT:    ld s4, 216(sp) # 8-byte Folded Reload
+; ZVE32F-NEXT:    .cfi_restore ra
+; ZVE32F-NEXT:    .cfi_restore s0
+; ZVE32F-NEXT:    .cfi_restore s2
+; ZVE32F-NEXT:    .cfi_restore s3
+; ZVE32F-NEXT:    .cfi_restore s4
+; ZVE32F-NEXT:    addi sp, sp, 256
+; ZVE32F-NEXT:    .cfi_def_cfa_offset 0
+; ZVE32F-NEXT:    ret
+;
+; ZIP-LABEL: unzip2a_dual_v16i64:
+; ZIP:       # %bb.0: # %entry
+; ZIP-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
+; ZIP-NEXT:    ri.vunzip2a.vv v16, v8, v10
+; ZIP-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZIP-NEXT:    vid.v v8
+; ZIP-NEXT:    li a0, -256
+; ZIP-NEXT:    vadd.vv v8, v8, v8
+; ZIP-NEXT:    vmv.s.x v0, a0
+; ZIP-NEXT:    vadd.vi v8, v8, -16
+; ZIP-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; ZIP-NEXT:    vrgatherei16.vv v16, v12, v8, v0.t
+; ZIP-NEXT:    vmv.v.v v8, v16
+; ZIP-NEXT:    ret
+entry:
+  %c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i64> %c
+}
+
+define <4 x i64> @unzip2a_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_range(4,4) {
+; V-LABEL: unzip2a_dual_v4i64_exact:
+; V:       # %bb.0: # %entry
+; V-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; V-NEXT:    vmv.v.i v0, 8
+; V-NEXT:    vslideup.vi v10, v9, 2
+; V-NEXT:    vslideup.vi v10, v9, 1, v0.t
+; V-NEXT:    vmv.v.i v0, 2
+; V-NEXT:    vslidedown.vi v8, v8, 1, v0.t
+; V-NEXT:    vmv.v.i v0, 12
+; V-NEXT:    vmerge.vvm v8, v8, v10, v0
+; V-NEXT:    ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v4i64_exact:
+; ZVE32F:       # %bb.0: # %entry
+; ZVE32F-NEXT:    ld a3, 0(a2)
+; ZVE32F-NEXT:    ld a2, 16(a2)
+; ZVE32F-NEXT:    ld a4, 0(a1)
+; ZVE32F-NEXT:    ld a1, 16(a1)
+; ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, mu
+; ZVE32F-NEXT:    vmv.v.i v0, 15
+; ZVE32F-NEXT:    srli a5, a2, 32
+; ZVE32F-NEXT:    srli a6, a3, 32
+; ZVE32F-NEXT:    srli a7, a1, 32
+; ZVE32F-NEXT:    srli t0, a4, 32
+; ZVE32F-NEXT:    vmv.v.x v8, a4
+; ZVE32F-NEXT:    vmv.v.x v9, a3
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, t0
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a7
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT:    vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT:    vs1r.v v9, (a0)
+; ZVE32F-NEXT:    ret
+;
+; ZIP-LABEL: unzip2a_dual_v4i64_exact:
+; ZIP:       # %bb.0: # %entry
+; ZIP-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; ZIP-NEXT:    vmv.v.i v0, 8
+; ZIP-NEXT:    vslideup.vi v10, v9, 2
+; ZIP-NEXT:    vslideup.vi v10, v9, 1, v0.t
+; ZIP-NEXT:    vmv.v.i v0, 12
+; ZIP-NEXT:    ri.vunzip2a.vv v11, v8, v9
+; ZIP-NEXT:    vmerge.vvm v8, v11, v10, v0
+; ZIP-NEXT:    ret
+entry:
+  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i64> %c
+}
+
+define <4 x i64> @unzip2a_dual_v4i64_exact_nf2(<4 x i64> %a, <4 x i64> %b) vscale_range(8,8) {
+; V-LABEL: unzip2a_dual_v4i64_exact_nf2:
+; V:       # %bb.0: # %entry
+; V-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; V-NEXT:    vmv.v.i v0, 8
+; V-NEXT:    vslideup.vi v10, v9, 2
+; V-NEXT:    vslideup.vi v10, v9, 1, v0.t
+; V-NEXT:    vmv.v.i v0, 2
+; V-NEXT:    vslidedown.vi v8, v8, 1, v0.t
+; V-NEXT:    vmv.v.i v0, 12
+; V-NEXT:    vmerge.vvm v8, v8, v10, v0
+; V-NEXT:    ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v4i64_exact_nf2:
+; ZVE32F:       # %bb.0: # %entry
+; ZVE32F-NEXT:    ld a3, 0(a2)
+; ZVE32F-NEXT:    ld a2, 16(a2)
+; ZVE32F-NEXT:    ld a4, 0(a1)
+; ZVE32F-NEXT:    ld a1, 16(a1)
+; ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, mu
+; ZVE32F-NEXT:    vmv.v.i v0, 15
+; ZVE32F-NEXT:    srli a5, a2, 32
+; ZVE32F-NEXT:    srli a6, a3, 32
+; ZVE32F-NEXT:    srli a7, a1, 32
+; ZVE32F-NEXT:    srli t0, a4, 32
+; ZVE32F-NEXT:    vmv.v.x v8, a4
+; ZVE32F-NEXT:    vmv.v.x v9, a3
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, t0
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a6
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a7
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT:    vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT:    vse32.v v9, (a0)
+; ZVE32F-NEXT:    ret
+;
+; ZIP-LABEL: unzip2a_dual_v4i64_exact_nf2:
+; ZIP:       # %bb.0: # %entry
+; ZIP-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; ZIP-NEXT:    vmv.v.i v0, 8
+; ZIP-NEXT:    vslideup.vi v10, v9, 2
+; ZIP-NEXT:    vslideup.vi v10, v9, 1, v0.t
+; ZIP-NEXT:    vmv.v.i v0, 12
+; ZIP-NEXT:    ri.vunzip2a.vv v11, v8, v9
+; ZIP-NEXT:    vmerge.vvm v8, v11, v10, v0
+; ZIP-NEXT:    ret
+entry:
+  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i64> %c
+}
+
+define <16 x i64> @unzip2a_dual_v16i64_exact(<16 x i64> %a, <16 x i64> %b) vscale_range(4,4) {
+; V-LABEL: unzip2a_dual_v16i64_exact:
+; V:       # %bb.0: # %entry
+; V-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; V-NEXT:    vslideup.vi v19, v15, 2
+; V-NEXT:    vmv.v.i v16, 8
+; V-NEXT:    vmv.v.i v17, 2
+; V-NEXT:    vmv.v.i v18, 12
+; V-NEXT:    vmv.v.v v0, v16
+; V-NEXT:    vslideup.vi v19, v15, 1, v0.t
+; V-NEXT:    vmv.v.v v0, v17
+; V-NEXT:    vslidedown.vi v14, v14, 1, v0.t
+; V-NEXT:    vmv.v.v v0, v18
+; V-NEXT:    vmerge.vvm v15, v14, v19, v0
+; V-NEXT:    vslideup.vi v14, v13, 2
+; V-NEXT:    vmv.v.v v0, v16
+; V-NEXT:    vslideup.vi v14, v13, 1, v0.t
+; V-NEXT:    vmv.v.v v0, v17
+; V-NEXT:    vslidedown.vi v12, v12, 1, v0.t
+; V-NEXT:    vmv.v.v v0, v18
+; V-NEXT:    vmerge.vvm v14, v12, v14, v0
+; V-NEXT:    vslideup.vi v12, v11, 2
+; V-NEXT:    li a0, -256
+; V-NEXT:    vmv.v.v v0, v16
+; V-NEXT:    vslideup.vi v12, v11, 1, v0.t
+; V-NEXT:    vmv.v.v v0, v17
+; V-NEXT:    vslidedown.vi v10, v10, 1, v0.t
+; V-NEXT:    vmv.v.v v0, v18
+; V-NEXT:    vmerge.vvm v13, v10, v12, v0
+; V-NEXT:    vslideup.vi v10, v9, 2
+; V-NEXT:    vmv.v.v v0, v16
+; V-NEXT:    vslideup.vi v10, v9, 1, v0.t
+; V-NEXT:    vmv.v.v v0, v17
+; V-NEXT:    vslidedown.vi v8, v8, 1, v0.t
+; V-NEXT:    vmv.v.v v0, v18
+; V-NEXT:    vmerge.vvm v12, v8, v10, v0
+; V-NEXT:    vmv.s.x v0, a0
+; V-NEXT:    vsetivli zero, 16, e64, m4, ta, ma
+; V-NEXT:    vmerge.vvm v8, v12, v12, v0
+; V-NEXT:    ret
+;
+; ZVE32F-LABEL: unzip2a_dual_v16i64_exact:
+; ZVE32F:       # %bb.0: # %entry
+; ZVE32F-NEXT:    ld a6, 64(a1)
+; ZVE32F-NEXT:    ld a4, 80(a1)
+; ZVE32F-NEXT:    ld a7, 96(a1)
+; ZVE32F-NEXT:    ld t0, 0(a2)
+; ZVE32F-NEXT:    ld a3, 16(a2)
+; ZVE32F-NEXT:    ld t1, 32(a2)
+; ZVE32F-NEXT:    ld a5, 112(a1)
+; ZVE32F-NEXT:    srli t2, a7, 32
+; ZVE32F-NEXT:    vsetivli zero, 8, e32, m1, ta, mu
+; ZVE32F-NEXT:    vmv.v.x v8, a6
+; ZVE32F-NEXT:    srli a6, a6, 32
+; ZVE32F-NEXT:    vmv.v.x v9, a7
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a6
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, t2
+; ZVE32F-NEXT:    ld a6, 0(a1)
+; ZVE32F-NEXT:    ld a7, 16(a1)
+; ZVE32F-NEXT:    ld t2, 32(a1)
+; ZVE32F-NEXT:    ld a1, 48(a1)
+; ZVE32F-NEXT:    vmv.v.x v10, a6
+; ZVE32F-NEXT:    srli a6, a6, 32
+; ZVE32F-NEXT:    vslide1down.vx v10, v10, a6
+; ZVE32F-NEXT:    ld a6, 48(a2)
+; ZVE32F-NEXT:    vmv.v.x v11, t1
+; ZVE32F-NEXT:    srli t1, t1, 32
+; ZVE32F-NEXT:    vmv.v.x v12, t0
+; ZVE32F-NEXT:    srli t0, t0, 32
+; ZVE32F-NEXT:    vmv.v.x v13, t2
+; ZVE32F-NEXT:    srli t2, t2, 32
+; ZVE32F-NEXT:    vslide1down.vx v13, v13, t2
+; ZVE32F-NEXT:    vslide1down.vx v12, v12, t0
+; ZVE32F-NEXT:    vslide1down.vx v11, v11, t1
+; ZVE32F-NEXT:    ld t0, 64(a2)
+; ZVE32F-NEXT:    ld t1, 80(a2)
+; ZVE32F-NEXT:    ld t2, 96(a2)
+; ZVE32F-NEXT:    ld a2, 112(a2)
+; ZVE32F-NEXT:    vmv.v.x v14, t0
+; ZVE32F-NEXT:    srli t0, t0, 32
+; ZVE32F-NEXT:    vslide1down.vx v14, v14, t0
+; ZVE32F-NEXT:    vmv.v.x v15, t2
+; ZVE32F-NEXT:    srli t0, t2, 32
+; ZVE32F-NEXT:    vslide1down.vx v15, v15, t0
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
+; ZVE32F-NEXT:    srli a4, a4, 32
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a4
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT:    srli a5, a5, 32
+; ZVE32F-NEXT:    vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT:    vslide1down.vx v10, v10, a7
+; ZVE32F-NEXT:    srli a4, a7, 32
+; ZVE32F-NEXT:    vslide1down.vx v10, v10, a4
+; ZVE32F-NEXT:    vslide1down.vx v12, v12, a3
+; ZVE32F-NEXT:    srli a3, a3, 32
+; ZVE32F-NEXT:    vslide1down.vx v12, v12, a3
+; ZVE32F-NEXT:    vmv.v.i v0, 15
+; ZVE32F-NEXT:    vslide1down.vx v14, v14, t1
+; ZVE32F-NEXT:    srli a3, t1, 32
+; ZVE32F-NEXT:    vslide1down.vx v14, v14, a3
+; ZVE32F-NEXT:    vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT:    vslide1down.vx v8, v13, a1
+; ZVE32F-NEXT:    srli a1, a1, 32
+; ZVE32F-NEXT:    vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT:    vslidedown.vi v8, v10, 4, v0.t
+; ZVE32F-NEXT:    vslide1down.vx v10, v11, a6
+; ZVE32F-NEXT:    srli a1, a6, 32
+; ZVE32F-NEXT:    vslide1down.vx v10, v10, a1
+; ZVE32F-NEXT:    vslidedown.vi v10, v12, 4, v0.t
+; ZVE32F-NEXT:    vslide1down.vx v11, v15, a2
+; ZVE32F-NEXT:    srli a2, a2, 32
+; ZVE32F-NEXT:    vslide1down.vx v11, v11, a2
+; ZVE32F-NEXT:    vslidedown.vi v11, v14, 4, v0.t
+; ZVE32F-NEXT:    vs4r.v v8, (a0)
+; ZVE32F-NEXT:    ret
+;
+; ZIP-LABEL: unzip2a_dual_v16i64_exact:
+; ZIP:       # %bb.0: # %entry
+; ZIP-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; ZIP-NEXT:    vslideup.vi v18, v15, 2
+; ZIP-NEXT:    vmv.v.i v16, 8
+; ZIP-NEXT:    vmv.v.i v17, 12
+; ZIP-NEXT:    vslideup.vi v20, v13, 2
+; ZIP-NEXT:    vmv.v.v v0, v16
+; ZIP-NEXT:    vslideup.vi v18, v15, 1, v0.t
+; ZIP-NEXT:    ri.vunzip2a.vv v15, v14, v19
+; ZIP-NEXT:    vmv.v.v v0, v17
+; ZIP-NEXT:    vmerge.vvm v15, v15, v18, v0
+; ZIP-NEXT:    vmv.v.v v0, v16
+; ZIP-NEXT:    vslideup.vi v20, v13, 1, v0.t
+; ZIP-NEXT:    ri.vunzip2a.vv v14, v12, v13
+; ZIP-NEXT:    vslideup.vi v12, v11, 2
+; ZIP-NEXT:    vslideup.vi v18, v9, 2
+; ZIP-NEXT:    vmv.v.v v0, v17
+; ZIP-NEXT:    vmerge.vvm v14, v14, v20, v0
+; ZIP-NEXT:    li a0, -256
+; ZIP-NEXT:    ri.vunzip2a.vv v20, v10, v13
+; ZIP-NEXT:    ri.vunzip2a.vv v10, v8, v19
+; ZIP-NEXT:    vmv.v.v v0, v16
+; ZIP-NEXT:    vslideup.vi v12, v11, 1, v0.t
+; ZIP-NEXT:    vmv.v.v v0, v17
+; ZIP-NEXT:    vmerge.vvm v13, v20, v12, v0
+; ZIP-NEXT:    vmv.v.v v0, v16
+; ZIP-NEXT:    vslideup.vi v18, v9, 1, v0.t
+; ZIP-NEXT:    vmv.v.v v0, v17
+; ZIP-NEXT:    vmerge.vvm v12, v10, v18, v0
+; ZIP-NEXT:    vmv.s.x v0, a0
+; ZIP-NEXT:    vsetivli zero, 16, e64, m4, ta, ma
+; ZIP-NEXT:    vmerge.vvm v8, v12, v12, v0
+; ZIP-NEXT:    ret
+entry:
+  %c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i64> %c
+}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index 4a89705..f56cab1 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -191,3 +191,51 @@ define <4 x i32> @popcount32vec(<4 x i32> %0) {
   %13 = lshr <4 x i32> %12, <i32 24, i32 24, i32 24, i32 24>
   ret <4 x i32> %13
 }
+
+define i32 @popcount64_zext(i32 %x) {
+; CHECK-LABEL: @popcount64_zext(
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[ZEXT]])
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
+; CHECK-NEXT:    ret i32 [[TMP13]]
+;
+  %zext = zext i32 %x to i64
+  %1 = lshr i64 %zext, 1
+  %2 = and i64 %1, 1431655765
+  %3 = sub nsw i64 %zext, %2
+  %4 = and i64 %3, 3689348814741910323
+  %5 = lshr i64 %3, 2
+  %6 = and i64 %5, 3689348814741910323
+  %7 = add nuw nsw i64 %6, %4
+  %8 = lshr i64 %7, 4
+  %9 = add nuw nsw i64 %8, %7
+  %10 = and i64 %9, 1085102592571150095
+  %11 = mul i64 %10, 72340172838076673
+  %12 = lshr i64 %11, 56
+  %13 = trunc nuw nsw i64 %12 to i32
+  ret i32 %13
+}
+
+define i32 @popcount64_mask(i64 %x) {
+; CHECK-LABEL: @popcount64_mask(
+; CHECK-NEXT:    [[MASK:%.*]] = and i64 [[X:%.*]], -281470681808896
+; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[MASK]])
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
+; CHECK-NEXT:    ret i32 [[TMP13]]
+;
+  %mask = and i64 %x, -281470681808896 ; 0xffff0000ffff0000
+  %1 = lshr i64 %mask, 1
+  %2 = and i64 %1, 6148820867675914240 ; 0x0x5555000055550000
+  %3 = sub nsw i64 %mask, %2
+  %4 = and i64 %3, 3689348814741910323
+  %5 = lshr i64 %3, 2
+  %6 = and i64 %5, 3689348814741910323
+  %7 = add nuw nsw i64 %6, %4
+  %8 = lshr i64 %7, 4
+  %9 = add nuw nsw i64 %8, %7
+  %10 = and i64 %9, 1085102592571150095
+  %11 = mul i64 %10, 72340172838076673
+  %12 = lshr i64 %11, 56
+  %13 = trunc nuw nsw i64 %12 to i32
+  ret i32 %13
+}
diff --git a/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll b/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll
new file mode 100644
index 0000000..1de41e4
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/add-nsw-zext-fold.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p indvars -S %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+
+declare void @foo(i32)
+
+define void @add_nsw_zext_fold_results_in_sext(i64 %len) {
+; CHECK-LABEL: define void @add_nsw_zext_fold_results_in_sext(
+; CHECK-SAME: i64 [[LEN:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LEN_TRUNC:%.*]] = trunc i64 [[LEN]] to i32
+; CHECK-NEXT:    [[LZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[LEN_TRUNC]], i1 false)
+; CHECK-NEXT:    [[SUB_I:%.*]] = lshr i32 [[LZ]], 3
+; CHECK-NEXT:    [[ADD_I:%.*]] = sub i32 5, [[SUB_I]]
+; CHECK-NEXT:    [[PRECOND:%.*]] = icmp eq i32 [[SUB_I]], 5
+; CHECK-NEXT:    br i1 [[PRECOND]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
+; CHECK:       [[LOOP_PREHEADER]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[ADD_I]] to i64
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP1]], %[[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[SH_PROM:%.*]] = zext nneg i32 [[IV_NEXT]] to i64
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 1, [[SH_PROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc nuw nsw i64 [[SHR]] to i32
+; CHECK-NEXT:    call void @foo(i32 [[TMP0]])
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %len.trunc  = trunc i64 %len to i32
+  %lz = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %len.trunc, i1 false)
+  %sub.i = lshr i32 %lz, 3
+  %add.i = sub nuw nsw i32 5, %sub.i
+  %precond = icmp eq i32 %sub.i, 5
+  br i1 %precond, label %exit, label %loop
+
+loop:
+  %iv = phi i32 [ %add.i, %entry ], [ %iv.next, %loop ]
+  %iv.next = add i32 %iv, 1
+  %sh_prom = zext nneg i32 %iv.next to i64
+  %shr = lshr i64 1, %sh_prom
+  %2 = trunc nuw nsw i64 %shr to i32
+  call void @foo(i32 %2)
+  %ec = icmp eq i32 %iv.next, 0
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @add_nsw_zext_fold_results_in_sext_known_positive(i32 %mask, ptr %src, i1 %c) {
+; CHECK-LABEL: define void @add_nsw_zext_fold_results_in_sext_known_positive(
+; CHECK-SAME: i32 [[MASK:%.*]], ptr [[SRC:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[C]], i32 0, i32 6
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SPEC_SELECT]], [[MASK]]
+; CHECK-NEXT:    [[PRECOND:%.*]] = icmp slt i32 [[ADD]], 0
+; CHECK-NEXT:    br i1 [[PRECOND]], label %[[EXIT:.*]], label %[[PH:.*]]
+; CHECK:       [[PH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 78, [[SPEC_SELECT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP2]]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP]], align 1
+; CHECK-NEXT:    call void @foo(i32 [[L]])
+; CHECK-NEXT:    br label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %spec.select = select i1 %c, i32 0, i32 6
+  %add = add i32 %spec.select, %mask
+  %precond = icmp slt i32 %add, 0
+  br i1 %precond, label %exit, label %ph
+
+ph:
+  %start = sub i32 79, %spec.select
+  br label %loop
+
+loop:                                     ; preds = %loop, %ph
+  %iv = phi i32 [ %start, %ph ], [ %dec, %loop ]
+  %iv.ext = zext i32 %iv to i64
+  %gep = getelementptr i32, ptr %src, i64 %iv.ext
+  %l = load i32, ptr %gep, align 1
+  call void @foo(i32 %l)
+  %dec = add i32 %iv, 0
+  br label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
new file mode 100644
index 0000000..df32e60
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/add-nsw-zext-fold.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-idiom -S %s | FileCheck %s
+
+define void @fold_add_zext_to_sext(ptr %dst, i1 %start) {
+; CHECK-LABEL: define void @fold_add_zext_to_sext(
+; CHECK-SAME: ptr [[DST:%.*]], i1 [[START:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[START_EXT:%.*]] = zext i1 [[START]] to i32
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i1 [[START]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 25, [[START_EXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[SCEVGEP]], i8 0, i64 [[TMP4]], i1 false)
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START_EXT]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_EXT:%.*]] = zext i32 [[IV]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV_EXT]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp ult i32 [[IV]], 24
+; CHECK-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %start.ext = zext i1 %start to i32
+  br label %loop
+
+loop:
+  %iv = phi i32 [ %start.ext, %entry ], [ %iv.next, %loop ]
+  %iv.ext = zext i32 %iv to i64
+  %gep = getelementptr i32, ptr %dst, i64 %iv.ext
+  store i32 0, ptr %gep, align 4
+  %iv.next = add i32 %iv, 1
+  %ec = icmp ult i32 %iv, 24
+  br i1 %ec, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_info_addrx.s b/llvm/test/tools/llvm-dwarfdump/X86/debug_info_addrx.s
index ec1ad22..51f6149 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/debug_info_addrx.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_info_addrx.s
@@ -1,6 +1,8 @@
 # RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux -o %t.o
 # RUN: llvm-dwarfdump -debug-info %t.o | FileCheck %s
 # RUN: llvm-dwarfdump -debug-info %t.o -v | FileCheck --check-prefix=VERBOSE %s
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux -o %t.o -crel
+# RUN: llvm-dwarfdump -debug-info %t.o | FileCheck %s
 
 # CHECK: DW_TAG_compile_unit
 # CHECK:   DW_AT_low_pc                                              (0x0000000000000000)
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_info_crel.yaml b/llvm/test/tools/llvm-dwarfdump/X86/debug_info_crel.yaml
deleted file mode 100644
index 308f211..0000000
--- a/llvm/test/tools/llvm-dwarfdump/X86/debug_info_crel.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-# Test llvm-dwarfdump handles CREL relocation sections correctly.
-
-# RUN: yaml2obj %s | llvm-dwarfdump - | FileCheck %s
-
-# CHECK: DW_AT_producer    ("clang version
-
---- !ELF
-FileHeader:
-  Class:           ELFCLASS64
-  Data:            ELFDATA2LSB
-  Type:            ET_REL
-  Machine:         EM_X86_64
-  SectionHeaderStringTable: .strtab
-Sections:
-  - Name:            .debug_abbrev
-    Type:            SHT_PROGBITS
-    AddressAlign:    0x1
-    Content:         011101252513050325721710171B25111B120673170000022E00111B120640187A196E2503253A0B3B0B49133F19000003240003253E0B0B0B000000
-  - Name:            .debug_info
-    Type:            SHT_PROGBITS
-    AddressAlign:    0x1
-    Content:         '3400000005000108000000000100210001000000000000000002000600000000000000020006000000015703040001330000000305050400'
-  - Name:            .debug_str_offsets
-    Type:            SHT_PROGBITS
-    AddressAlign:    0x1
-    Content:         1C00000005000000000000000000000000000000000000000000000000000000
-  - Name:            .crel.debug_info
-    Type:            SHT_CREL
-    Flags:           [ SHF_INFO_LINK ]
-    Link:            .symtab
-    AddressAlign:    0x1
-    EntSize:         0x1
-    Info:            .debug_info
-    Relocations:
-      - Offset:          0x11
-        Symbol:          .debug_str_offsets
-        Type:            R_X86_64_32
-        Addend:          8
-  - Name:            .crel.debug_str_offsets
-    Type:            SHT_CREL
-    Flags:           [ SHF_INFO_LINK ]
-    Link:            .symtab
-    AddressAlign:    0x1
-    EntSize:         0x1
-    Info:            .debug_str_offsets
-    Relocations:
-      - Offset:          0x8
-        Symbol:          .debug_str
-        Type:            R_X86_64_32
-  - Type:            SectionHeaderTable
-    Sections:
-      - Name:            .strtab
-      - Name:            .debug_abbrev
-      - Name:            .debug_info
-      - Name:            .crel.debug_info
-      - Name:            .debug_str_offsets
-      - Name:            .crel.debug_str_offsets
-      - Name:            .debug_str
-      - Name:            .symtab
-Symbols:
-  - Name:            foo.cpp
-    Type:            STT_FILE
-    Index:           SHN_ABS
-  - Name:            .debug_abbrev
-    Type:            STT_SECTION
-    Section:         .debug_abbrev
-  - Name:            .debug_str_offsets
-    Type:            STT_SECTION
-    Section:         .debug_str_offsets
-  - Name:            .debug_str
-    Type:            STT_SECTION
-    Section:         .debug_str
-DWARF:
-  debug_str:
-    - 'clang version 21.0.0git'
-...
diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
index 3b1fdb6..1a04d70 100644
--- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
+++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
@@ -10,10 +10,6 @@
 
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinTypeInterfaces.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/LogicalResult.h"
 
 #include <numeric>
 #include <optional>
@@ -32,329 +28,67 @@ mlir::getReassociationIndicesForReshape(ShapedType sourceType,
   return std::nullopt;
 }
 
-namespace {
-/// A simple struct to represent ReassociationIndices as an inclusive interval.
-/// It's designed to be feasibly minimal, so the call sites should manage the
-/// validity of the range manually.
-struct ReassociationIndexRange {
-  /// FIXME: Signed type is used for consistency with ReassociationIndices.
-  /// We should consider refactoring all reassociation utilities to use unsigned
-  /// types.
-  int64_t leftIdx = 0, rightIdx = 0;
-
-  /// Util for manual checks of the range's validity
-  LogicalResult verify() const {
-    return leftIdx >= 0 && (leftIdx <= rightIdx) ? success() : failure();
-  }
-
-  /// Checks range's containment within another range. Treats the edges
-  /// non-exclusively.
-  bool isInRange(const ReassociationIndexRange &outerRange) const {
-    return leftIdx >= outerRange.leftIdx && rightIdx <= outerRange.rightIdx;
-  }
-
-  unsigned size() const {
-    assert(succeeded(verify()));
-    return rightIdx - leftIdx + 1;
-  }
-  bool containsSingleIndex() const { return size() == 1; }
-
-  /// Collects indices that do not overlap between this and another range.
-  ReassociationIndices
-  getNonOverlappingIndicesWith(ReassociationIndexRange &rhs) const {
-    if (rightIdx < rhs.leftIdx) {
-      // The intervals do not overlap - concatenate the indices from both.
-      auto jointFullIndices = getFullIndices();
-      jointFullIndices.append(rhs.getFullIndices());
-      return jointFullIndices;
-    }
-    ReassociationIndices result;
-    // Handle the chunk left of the overlapping range.
-    int64_t leftStart = std::min(leftIdx, rhs.leftIdx);
-    int64_t leftEnd = std::max(leftIdx, rhs.leftIdx);
-    llvm::append_range(result, llvm::seq(leftStart, leftEnd));
-    // Handle the chunk right of the overlapping range. Symmetrically, we should
-    // skip the edge of the overlap AND include the rightmost index.
-    int64_t rightStart = std::min(rightIdx, rhs.rightIdx) + 1;
-    int64_t rightEnd = std::max(rightIdx, rhs.rightIdx);
-    if (rightStart < rightEnd)
-      llvm::append_range(result, llvm::seq_inclusive(rightStart, rightEnd));
-    return result;
-  }
-
-  /// Converts the range into ReassociationIndices.
-  ReassociationIndices getFullIndices() const {
-    ReassociationIndices result;
-    for (int64_t idx = leftIdx; idx <= rightIdx; ++idx) {
-      result.push_back(idx);
-    }
-    return result;
-  }
-};
-} // namespace
-
-/// Starting from `sourceStartIdx`, searches `sourceShape` for the first
-/// sequence that can be collapsed into a dynamic dimension (at least one must
-/// be present in the source).
-/// By default, lazily returns once the first dynamic dimension has been found.
-/// Setting `matchGreedily` as `true` will also mark all subsequent
-/// source dimensions for collapsing into the target.
-static FailureOr<ReassociationIndexRange>
-findReassociationRangeForDynamicDim(ArrayRef<int64_t> sourceShape,
-                                    int64_t sourceStartIdx,
-                                    bool matchGreedily = false) {
-  const unsigned numSourceDims = sourceShape.size();
-  ReassociationIndexRange sourceShapeAsRange{0, numSourceDims - 1};
-  std::optional<ReassociationIndexRange> resultRange = std::nullopt;
-
-  ReassociationIndexRange iterationRange{sourceStartIdx, sourceStartIdx};
-  for (; iterationRange.isInRange(sourceShapeAsRange);
-       iterationRange.rightIdx++) {
-    int64_t sourceSize = sourceShape[iterationRange.rightIdx];
-    if (sourceSize == ShapedType::kDynamic) {
-      resultRange = iterationRange;
-      break;
-    }
-  }
-  if (!resultRange)
-    return failure();
-  if (matchGreedily)
-    resultRange->rightIdx = sourceShapeAsRange.rightIdx;
-  return *resultRange;
-}
+std::optional<SmallVector<ReassociationIndices>>
+mlir::getReassociationIndicesForCollapse(ArrayRef<int64_t> sourceShape,
+                                         ArrayRef<int64_t> targetShape) {
+  if (sourceShape.size() <= targetShape.size())
+    return std::nullopt;
+  unsigned sourceDim = 0;
+  SmallVector<ReassociationIndices> reassociationMap;
+  reassociationMap.reserve(targetShape.size());
 
-/// Starting from `sourceStartIdx`, searches `sourceShape` for the first
-/// sequence of static dimensions such that their product matches `targetSize`.
-/// By default, lazily returns once the product matches the target size. Setting
-/// `matchGreedily` as `true` will append all neighboring unit dimensions
-/// (dimensions of 1) to the match.
-static FailureOr<ReassociationIndexRange>
-findReassociationRangeForSize(ArrayRef<int64_t> sourceShape,
-                              int64_t sourceStartIdx, int64_t targetSize,
-                              bool matchGreedily = false) {
-  const unsigned numSourceDims = sourceShape.size();
-  ReassociationIndexRange sourceShapeAsRange{0, numSourceDims - 1};
-  std::optional<ReassociationIndexRange> resultRange = std::nullopt;
-
-  ReassociationIndexRange iterationRange{sourceStartIdx, sourceStartIdx};
+  ReassociationIndices currIndices;
   int64_t prodOfCollapsedDims = 1;
-  while (iterationRange.isInRange(sourceShapeAsRange)) {
-    int64_t sourceSize = sourceShape[iterationRange.rightIdx];
-    if (sourceSize == ShapedType::kDynamic) {
-      // Reassociation for a static dim cannot include a dynamic dim. Reset
-      // induction variables to essentially restart the loop from the next
-      // source dimension.
-      prodOfCollapsedDims = 1;
-      iterationRange = {iterationRange.rightIdx + 1,
-                        iterationRange.rightIdx + 1};
-      continue;
-    }
-    prodOfCollapsedDims *= sourceSize;
-    // If the target size has been exceeded without matching, we need to shift
-    // the range start right. From the start of the range, roll back the
-    // multiplication until the target size exceeds the product again.
-    while (prodOfCollapsedDims > targetSize &&
-           !iterationRange.containsSingleIndex()) {
-      int64_t frontSourceSize = sourceShape[iterationRange.leftIdx];
-      prodOfCollapsedDims /= frontSourceSize;
-      // Shrink the range rightwards
-      iterationRange.leftIdx++;
-    }
-    // We could've reached the target size with the current dimension,
-    // also as a result of the above shift to right.
-    if (prodOfCollapsedDims == targetSize) {
-      resultRange = iterationRange;
+  while (sourceDim < sourceShape.size()) {
+    unsigned targetDim = reassociationMap.size();
+    // If we have mapped all the target dimensions stop and handle the remaining
+    // tail of size-1 dimensions explicitly.
+    if (targetDim == targetShape.size())
       break;
-    }
-    // Increment the iteration range
-    iterationRange.rightIdx++;
-  }
-  if (!resultRange)
-    return failure();
-  if (matchGreedily) {
-    // We now want to collect all unit dimensions directly after the target
-    // product match. Advance the iterator to avoid OOB when the product match
-    // happens at the last element.
-    iterationRange.rightIdx++;
-    while (iterationRange.isInRange(sourceShapeAsRange) &&
-           sourceShape[iterationRange.rightIdx] == 1) {
-      resultRange = iterationRange;
-      iterationRange.rightIdx++;
-    }
-  }
-  return *resultRange;
-}
 
-/// Attempts to find a valid collapsing reassociation of `sourceShape` into
-/// `targetShape` through a simple traversal. If successful, an array of source
-/// index ranges is returned, correspondingly to each dimension in the target
-/// shape. The resulting indices shall fully cover the `sourceShape` without
-/// overlaps.
-///
-/// The algorithm is essentially a lazy one, searching for non-greedy matches -
-/// it will only yield a greedy match for the last target dimension.
-/// FIXME: The algorithm can only backtrack when it needs to append an offset
-/// for a static target dimension to the preceding dynamic one (this retains the
-/// linear complexity). As feasible, consider adding further backtracking
-/// routines to enable more reassociations, e.g.:
-/// - ?x2x?x2 into ?x2
-static FailureOr<SmallVector<ReassociationIndexRange>>
-findReassociationRangesForCollapse(ArrayRef<int64_t> sourceShape,
-                                   ArrayRef<int64_t> targetShape) {
-  unsigned numSourceDims = sourceShape.size(),
-           numTargetDims = targetShape.size();
-  assert(numSourceDims > numTargetDims);
-  ReassociationIndexRange sourceShapeAsRange{0, numSourceDims - 1};
-
-  SmallVector<ReassociationIndexRange> reassocRanges;
-  reassocRanges.reserve(numTargetDims);
-  // We'll iterate in strides of 2 to enable pseudo-backtracking for simple
-  // cases, e.g.:
-  // - ?x2x3x5 into ?x15
-  std::optional<int64_t> prevTargetSize = std::nullopt;
-  for (unsigned targetDimIdx = 0, sourceDimIdx = 0;
-       targetDimIdx < numTargetDims; ++targetDimIdx) {
-    int64_t targetSize = targetShape[targetDimIdx];
-    // Simply check if there are any subsequent target dimensions left - if not,
-    // the match must be made greedily.
-    bool shouldMatchGreedily = targetDimIdx == numTargetDims - 1;
-    FailureOr<ReassociationIndexRange> sourceRange;
-    if (targetSize == ShapedType::kDynamic) {
-      sourceRange = findReassociationRangeForDynamicDim(
-          sourceShape, sourceDimIdx, shouldMatchGreedily);
-    } else {
-      sourceRange = findReassociationRangeForSize(
-          sourceShape, sourceDimIdx, targetSize, shouldMatchGreedily);
+    int64_t currTargetShape = targetShape[targetDim];
+    while (sourceDim < (sourceShape.size() - 1) &&
+           sourceShape[sourceDim] != ShapedType::kDynamic &&
+           prodOfCollapsedDims * sourceShape[sourceDim] < currTargetShape) {
+      prodOfCollapsedDims *= sourceShape[sourceDim];
+      currIndices.push_back(sourceDim++);
     }
 
-    // Run sanity checks on the returned index range.
-    if (failed(sourceRange) || failed(sourceRange->verify()) ||
-        !sourceRange->isInRange(sourceShapeAsRange))
-      return failure();
-    if (sourceRange->leftIdx > sourceDimIdx) {
-      // If some source dimensions had to be skipped in order to find a match,
-      // they must be collapsed into the directly preceding dynamic dimension.
-      if (!prevTargetSize || prevTargetSize != ShapedType::kDynamic)
-        return failure();
-      reassocRanges.back().rightIdx = sourceRange->leftIdx - 1;
-    }
-
-    // Store the gathered information as required for the next iteration.
-    prevTargetSize = targetSize;
-    sourceDimIdx = sourceRange->rightIdx + 1;
-    reassocRanges.push_back(*sourceRange);
+    // If the current expanded dimension is dynamic, then the collapsed
+    // dimensions should also be dynamic and product of all previous unprocessed
+    // dimensions of the expanded shape should be 1.
+    if (sourceShape[sourceDim] == ShapedType::kDynamic &&
+        (currTargetShape != ShapedType::kDynamic || prodOfCollapsedDims != 1))
+      return std::nullopt;
+
+    // If the collapsed dim is dynamic, the current expanded dim should also
+    // be dynamic.
+    if (currTargetShape == ShapedType::kDynamic &&
+        sourceShape[sourceDim] != ShapedType::kDynamic)
+      return std::nullopt;
+
+    // For static shapes, if the product of dimensions of the expanded shape
+    // should match the collapsed dimension shape.
+    if (prodOfCollapsedDims * sourceShape[sourceDim] != currTargetShape)
+      return std::nullopt;
+
+    currIndices.push_back(sourceDim++);
+    reassociationMap.emplace_back(ReassociationIndices{});
+    std::swap(reassociationMap.back(), currIndices);
+    prodOfCollapsedDims = 1;
   }
-  // Fail if the source shape wasn't a full match for the target shape. We only
-  // need to check the last recorded index - any other gaps should have been
-  // mended by the main loop.
-  if (reassocRanges.back().rightIdx < sourceShapeAsRange.rightIdx)
-    return failure();
-  return reassocRanges;
-}
-
-/// A variant of `findReassociationRangesForCollapse(...)` that can also scan
-/// the shapes right-to-left.
-static FailureOr<SmallVector<ReassociationIndexRange>>
-findReassociationRangesForCollapse(ArrayRef<int64_t> sourceShape,
-                                   ArrayRef<int64_t> targetShape,
-                                   bool iterateRightToLeft) {
-  if (!iterateRightToLeft)
-    return findReassociationRangesForCollapse(sourceShape, targetShape);
-  // NB: To iterate right-to-left, we currently reverse the shapes and then
-  // reverse the result back. The reversed shapes must not be temporary, as
-  // we're passing through an ArrayRef.
-  // FIXME: It would be preferable to avoid the expensive copies. At the moment,
-  // this approach is chosen for readability of the main implementation.
-  std::vector<int64_t> sourceToReverse = sourceShape.vec(),
-                       targetToReverse = targetShape.vec();
-  std::reverse(sourceToReverse.begin(), sourceToReverse.end());
-  std::reverse(targetToReverse.begin(), targetToReverse.end());
-  auto invertedRanges =
-      findReassociationRangesForCollapse(sourceToReverse, targetToReverse);
-  if (failed(invertedRanges))
-    return failure();
-  SmallVector<ReassociationIndexRange> &rangesToInvert = *invertedRanges;
-  unsigned numSourceDims = sourceShape.size();
-  // We have received the ranges for inverted shapes. Now we have to invert
-  // the ranges back to correspond with the original source shape.
-  for (auto &range : rangesToInvert) {
-    int64_t invLeftIdx = range.leftIdx, invRightIdx = range.rightIdx;
-    range.leftIdx = numSourceDims - 1 - invRightIdx;
-    range.rightIdx = numSourceDims - 1 - invLeftIdx;
-  }
-  // Also invert the ordering of the ranges to correspond with the original
-  // target shape.
-  std::reverse(rangesToInvert.begin(), rangesToInvert.end());
-  return rangesToInvert;
-}
-
-std::optional<SmallVector<ReassociationIndices>>
-mlir::getReassociationIndicesForCollapse(ArrayRef<int64_t> sourceShape,
-                                         ArrayRef<int64_t> targetShape) {
-  unsigned numSourceDims = sourceShape.size(),
-           numTargetDims = targetShape.size();
-  // We're supposed to search for a collapsing reassociation. If the sizes
-  // match, there's no actual collapsing taking place - it's either a no-op or a
-  // `tensor.reshape`-style reassociation (that would be beyond the scope of
-  // this utility).
-  if (numSourceDims <= numTargetDims)
-    return std::nullopt;
-  // Early handling for scalar target types.
-  if (numTargetDims == 0) {
-    ReassociationIndices allSourceIndices;
-    allSourceIndices.reserve(numSourceDims);
-    for (unsigned sourceDimIdx = 0; sourceDimIdx < numSourceDims;
-         ++sourceDimIdx) {
-      int64_t sourceSize = sourceShape[sourceDimIdx];
-      // All source dimensions must be unit or dynamic.
-      if (sourceSize != 1 && sourceSize != ShapedType::kDynamic)
-        return std::nullopt;
-      allSourceIndices.push_back(sourceDimIdx);
-    }
-    return SmallVector<ReassociationIndices>{allSourceIndices};
-  }
-
-  // Collect source ranges by iterating over the target shape left-to-right.
-  FailureOr<SmallVector<ReassociationIndexRange>> maybeForwardRanges =
-      findReassociationRangesForCollapse(sourceShape, targetShape);
-  if (failed(maybeForwardRanges))
-    return std::nullopt;
-  auto &ranges = *maybeForwardRanges;
-  // Now do the same in reverse. We need to get another valid reassociation
-  // through some other strategy, and then compare the results in order to
-  // disambiguate mixed subshapes, such as:
-  // ?x?x? into ?x?, ?x2x? into ?x?, ?x2x3x6x? into ?x6x?
-  // This leads us to lose some of the reassociation opportunities that can only
-  // be found by iterating in a certain direction, e.g. 2x2x? into 2x? - without
-  // backtracking, the algorithm will fail right-to-left. However, this is the
-  // best way to preserve correctness.
-  FailureOr<SmallVector<ReassociationIndexRange>> maybeReverseRanges =
-      findReassociationRangesForCollapse(sourceShape, targetShape,
-                                         /*iterateRightToLeft=*/true);
-  if (failed(maybeReverseRanges))
-    return std::nullopt;
-  auto &reverseRanges = *maybeReverseRanges;
-
-  if (ranges.size() != numTargetDims || reverseRanges.size() != numTargetDims)
+  // All the dimensions in the target must have been processed.
+  if (reassociationMap.size() != targetShape.size())
     return std::nullopt;
-  // Now we can check for ambiguity of each target dimension's reassociation. If
-  // successful, we put the full indices into our result map for the target
-  // shape.
-  SmallVector<ReassociationIndices> reassociationMap(numTargetDims);
-  for (unsigned targetDimIdx = 0; targetDimIdx < numTargetDims;
-       ++targetDimIdx) {
-    ReassociationIndexRange &range = ranges[targetDimIdx];
-    ReassociationIndexRange &reverseRange = reverseRanges[targetDimIdx];
-    // Get non-overlapping indices between the ranges
-    ReassociationIndices nonMatchingIndices =
-        range.getNonOverlappingIndicesWith(reverseRange);
-    // Unit dimensions can be collapsed wherever - this is the only ambiguity
-    // that we allow.
-    for (int64_t sourceDimIdx : nonMatchingIndices) {
-      if (sourceShape[sourceDimIdx] != 1)
-        return std::nullopt;
-    }
-    reassociationMap[targetDimIdx] = range.getFullIndices();
+  // Process any remaining entries in the source shape. They all need to be
+  // 1 or dynamic.
+  for (; sourceDim < sourceShape.size(); sourceDim++) {
+    if (sourceShape[sourceDim] != ShapedType::kDynamic &&
+        sourceShape[sourceDim] != 1)
+      return std::nullopt;
+    // The map is empty when the target type is a scalar.
+    if (!reassociationMap.empty())
+      reassociationMap.back().push_back(sourceDim);
   }
   return reassociationMap;
 }
diff --git a/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir b/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
index 6979770..51350e5 100644
--- a/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
@@ -158,8 +158,8 @@ func.func @unpack_to_partial_slice(%arg0: tensor<8x32xf32>) -> tensor<255xf32> {
 // -----
 
 // CHECK-LABEL: func.func @unpack_dynamic
-// CHECK:     tensor.collapse
-// CHECK-NOT:         linalg.unpack
+// CHECK-NOT:     tensor.collapse
+// CHECK:         linalg.unpack
 func.func @unpack_dynamic(%arg0: tensor<?x32xf32>) -> tensor<?xf32> {
   %c32 = arith.constant 32 : index
   %c0 = arith.constant 0 : index
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 646b219..0abec7e 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -1117,7 +1117,7 @@ func.func @fold_expand_of_collapse(%arg0 : tensor<3x4x4xf32>) -> tensor<3x4x4xf3
 
 // -----
 
-func.func @fold_expand_of_collapse_mixed_subshape(%arg0 : tensor<?x4x?xf32>, %arg1: index, %arg2: index)
+func.func @fold_expand_of_collapse_dynamic(%arg0 : tensor<?x4x?xf32>, %arg1: index, %arg2: index)
     -> tensor<?x4x?xf32> {
   %0 = tensor.collapse_shape %arg0 [[0, 1], [2]]
       : tensor<?x4x?xf32> into tensor<?x?xf32>
@@ -1125,28 +1125,12 @@ func.func @fold_expand_of_collapse_mixed_subshape(%arg0 : tensor<?x4x?xf32>, %ar
       : tensor<?x?xf32> into tensor<?x4x?xf32>
   return %1 : tensor<?x4x?xf32>
 }
-// CHECK-LABEL: @fold_expand_of_collapse_mixed_subshape
+// CHECK-LABEL: @fold_expand_of_collapse_dynamic
 //   CHECK-NOT:   tensor.{{.*}}_shape
 
 // -----
 
-func.func @fold_expand_of_collapse_mixed_target_subshape(%arg0 : tensor<?x4x?x2xf32>, %arg1: index, %arg2: index)
-    -> tensor<?x4x?xf32> {
-  %0 = tensor.collapse_shape %arg0 [[0, 1], [2, 3]]
-      : tensor<?x4x?x2xf32> into tensor<?x?xf32>
-  %1 = tensor.expand_shape %0 [[0, 1], [2]] output_shape [%arg1, 4, %arg2]
-      : tensor<?x?xf32> into tensor<?x4x?xf32>
-  return %1 : tensor<?x4x?xf32>
-}
-// CHECK-LABEL: @fold_expand_of_collapse_mixed_target_subshape
-//   CHECK-NOT:   tensor.expand_shape
-//       CHECK:   %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0 {{\[}}[0], [1], [2, 3]]
-//  CHECK-SAME:     : tensor<?x4x?x2xf32> into tensor<?x4x?xf32>
-//  CHECK-NEXT:   return %[[COLLAPSE]]
-
-// -----
-
-func.func @no_fold_expand_of_collapse_fully_dynamic(%arg0 : tensor<?x?x?xf32>, %arg1: index, %arg2: index, %arg3: index)
+func.func @no_fold_expand_of_collapse_dynamic(%arg0 : tensor<?x?x?xf32>, %arg1: index, %arg2: index, %arg3: index)
     -> tensor<?x?x?xf32> {
   %0 = tensor.collapse_shape %arg0 [[0, 1], [2]]
       : tensor<?x?x?xf32> into tensor<?x?xf32>
@@ -1154,22 +1138,7 @@ func.func @no_fold_expand_of_collapse_fully_dynamic(%arg0 : tensor<?x?x?xf32>, %
       : tensor<?x?xf32> into tensor<?x?x?xf32>
   return %1 : tensor<?x?x?xf32>
 }
-// CHECK-LABEL: @no_fold_expand_of_collapse_fully_dynamic
-//       CHECK:   tensor.collapse_shape
-//       CHECK:   %[[EXPAND:.+]] = tensor.expand_shape
-//       CHECK:   return %[[EXPAND]]
-
-// -----
-
-func.func @no_fold_expand_of_collapse_adjacent_dynamic(%arg0 : tensor<?x?x?xf32>, %arg1: index, %arg2: index)
-    -> tensor<?x?xf32> {
-  %0 = tensor.collapse_shape %arg0 [[0, 1, 2]]
-      : tensor<?x?x?xf32> into tensor<?xf32>
-  %1 = tensor.expand_shape %0 [[0, 1]] output_shape [%arg1, %arg2]
-      : tensor<?xf32> into tensor<?x?xf32>
-  return %1 : tensor<?x?xf32>
-}
-// CHECK-LABEL: @no_fold_expand_of_collapse_adjacent_dynamic
+// CHECK-LABEL: @no_fold_expand_of_collapse_dynamic
 //       CHECK:   tensor.collapse_shape
 //       CHECK:   %[[EXPAND:.+]] = tensor.expand_shape
 //       CHECK:   return %[[EXPAND]]
diff --git a/mlir/unittests/Dialect/Utils/CMakeLists.txt b/mlir/unittests/Dialect/Utils/CMakeLists.txt
index e921c8b..61b9cdc 100644
--- a/mlir/unittests/Dialect/Utils/CMakeLists.txt
+++ b/mlir/unittests/Dialect/Utils/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_mlir_unittest(MLIRDialectUtilsTests
   StructuredOpsUtilsTest.cpp
-  ReshapeOpsUtilsTest.cpp
   IndexingUtilsTest.cpp
 )
 mlir_target_link_libraries(MLIRDialectUtilsTests
diff --git a/mlir/unittests/Dialect/Utils/ReshapeOpsUtilsTest.cpp b/mlir/unittests/Dialect/Utils/ReshapeOpsUtilsTest.cpp
deleted file mode 100644
index db1a87a..0000000
--- a/mlir/unittests/Dialect/Utils/ReshapeOpsUtilsTest.cpp
+++ /dev/null
@@ -1,203 +0,0 @@
-//===- ReshapeOpsUtilsTest.cpp - ReshapeOpsUtils unit tests ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
-#include "mlir/IR/BuiltinTypeInterfaces.h"
-#include "llvm/ADT/STLExtras.h"
-#include "gtest/gtest.h"
-#include <optional>
-
-using namespace mlir;
-
-/// Helper to make constructing
-/// `std::optional<SmallVector<ReassociationIndices>>` more readable.
-static std::optional<SmallVector<ReassociationIndices>>
-makeOptionalIndices(std::initializer_list<ReassociationIndices> list) {
-  return std::optional<SmallVector<ReassociationIndices>>(list);
-}
-
-TEST(ReassociationIndicesForCollapse, ScalarTest) {
-  EXPECT_EQ(getReassociationIndicesForCollapse({1}, {}),
-            makeOptionalIndices({{0}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({1, 1}, {}),
-            makeOptionalIndices({{0, 1}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({ShapedType::kDynamic}, {}),
-            makeOptionalIndices({{0}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({1, ShapedType::kDynamic,
-                                                ShapedType::kDynamic, 1,
-                                                ShapedType::kDynamic},
-                                               {}),
-            makeOptionalIndices({{0, 1, 2, 3, 4}}));
-}
-
-TEST(ReassociationIndicesForCollapse, ScalarTestFailure) {
-  EXPECT_EQ(getReassociationIndicesForCollapse({}, {}), std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse({}, {1}), std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse({2}, {}), std::nullopt);
-  EXPECT_EQ(
-      getReassociationIndicesForCollapse({1, 2, ShapedType::kDynamic, 1}, {}),
-      std::nullopt);
-}
-
-TEST(ReassociationIndicesForCollapse, StaticTest) {
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, 20}, {200}),
-            makeOptionalIndices({{0, 1}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, 20, 30}, {10, 600}),
-            makeOptionalIndices({{0}, {1, 2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, 20, 30}, {200, 30}),
-            makeOptionalIndices({{0, 1}, {2}}));
-}
-
-TEST(ReassociationIndicesForCollapse, StaticTestFailure) {
-  // No-op reassociation
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, 20}, {10, 20}),
-            std::nullopt);
-  // Invalid static reassociations
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, 20}, {10}), std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, 20, 30}, {200, 300}),
-            std::nullopt);
-  // Non-collapsing (expanding) reassociation
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, 20, 30}, {1, 10, 20, 30}),
-            std::nullopt);
-}
-
-TEST(ReassociationIndicesForCollapse, StaticTestUnitDims) {
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, 1}, {10}),
-            makeOptionalIndices({{0, 1}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({1, 20, 30}, {600}),
-            makeOptionalIndices({{0, 1, 2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({1, 1, 1}, {1}),
-            makeOptionalIndices({{0, 1, 2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({1, 1, 1, 1}, {1, 1, 1}),
-            makeOptionalIndices({{0}, {1}, {2, 3}}));
-}
-
-TEST(ReassociationIndicesForCollapse, DynamicTest) {
-  EXPECT_EQ(getReassociationIndicesForCollapse({ShapedType::kDynamic, 1},
-                                               {ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({ShapedType::kDynamic, 1, 1},
-                                               {ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1, 2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {1, ShapedType::kDynamic, 1, ShapedType::kDynamic, 1},
-                {ShapedType::kDynamic, ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1}, {2, 3, 4}}));
-  EXPECT_EQ(
-      getReassociationIndicesForCollapse(
-          {ShapedType::kDynamic, ShapedType::kDynamic}, {ShapedType::kDynamic}),
-      makeOptionalIndices({{0, 1}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {1, ShapedType::kDynamic, ShapedType::kDynamic},
-                {1, ShapedType::kDynamic}),
-            makeOptionalIndices({{0}, {1, 2}}));
-
-  EXPECT_EQ(getReassociationIndicesForCollapse({ShapedType::kDynamic, 10},
-                                               {ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {1, ShapedType::kDynamic, ShapedType::kDynamic},
-                {ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1, 2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, ShapedType::kDynamic},
-                                               {ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 1, 2, ShapedType::kDynamic, 10},
-                {ShapedType::kDynamic, 10}),
-            makeOptionalIndices({{0, 1, 2, 3}, {4}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({ShapedType::kDynamic, 10, 20},
-                                               {ShapedType::kDynamic, 20}),
-            makeOptionalIndices({{0, 1}, {2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({10, ShapedType::kDynamic, 20},
-                                               {ShapedType::kDynamic, 20}),
-            makeOptionalIndices({{0, 1}, {2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 3, 2, 5, 2}, {ShapedType::kDynamic, 20}),
-            makeOptionalIndices({{0, 1}, {2, 3, 4}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {10, ShapedType::kDynamic, 20, ShapedType::kDynamic, 1},
-                {ShapedType::kDynamic, 20, ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1}, {2}, {3, 4}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse({1, ShapedType::kDynamic, 1},
-                                               {ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1, 2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, ShapedType::kDynamic, 1},
-                {ShapedType::kDynamic, ShapedType::kDynamic}),
-            makeOptionalIndices({{0}, {1, 2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {1, ShapedType::kDynamic, ShapedType::kDynamic},
-                {ShapedType::kDynamic, ShapedType::kDynamic}),
-            makeOptionalIndices({{0, 1}, {2}}));
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 1, ShapedType::kDynamic},
-                {ShapedType::kDynamic, ShapedType::kDynamic}),
-            makeOptionalIndices({{0}, {1, 2}}));
-}
-
-TEST(ReassociationIndicesForCollapse, DynamicTestFailure) {
-  EXPECT_EQ(getReassociationIndicesForCollapse({ShapedType::kDynamic, 10, 20},
-                                               {ShapedType::kDynamic, 10}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 10, ShapedType::kDynamic},
-                {ShapedType::kDynamic, ShapedType::kDynamic}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {20, ShapedType::kDynamic, 10, ShapedType::kDynamic},
-                {ShapedType::kDynamic, ShapedType::kDynamic}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 5, 3, 2, 2}, {ShapedType::kDynamic, 20}),
-            std::nullopt);
-  EXPECT_EQ(
-      getReassociationIndicesForCollapse(
-          {ShapedType::kDynamic, ShapedType::kDynamic, ShapedType::kDynamic},
-          {ShapedType::kDynamic, ShapedType::kDynamic}),
-      std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, ShapedType::kDynamic, 10, 1,
-                 ShapedType::kDynamic},
-                {ShapedType::kDynamic, ShapedType::kDynamic}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 10, 10, 10, ShapedType::kDynamic},
-                {ShapedType::kDynamic, 10, ShapedType::kDynamic}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 10, 10, 10, ShapedType::kDynamic},
-                {ShapedType::kDynamic, 2, 2, ShapedType::kDynamic}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 3, 4, 3, ShapedType::kDynamic},
-                {ShapedType::kDynamic, 12, ShapedType::kDynamic}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 8, 4, 2, 16, ShapedType::kDynamic},
-                {ShapedType::kDynamic, 32, ShapedType::kDynamic}),
-            std::nullopt);
-
-  //===----------------------------------------------------------------------===//
-  // TODO: Reassociation for the following examples can be computed, but isn't
-  // supported by `getReassociationIndicesForCollapse`.
-  //===----------------------------------------------------------------------===//
-
-  // TODO: Fails because there's no backtracking when some source dimensions
-  // remain unmatched at either edge.
-  EXPECT_EQ(getReassociationIndicesForCollapse(
-                {ShapedType::kDynamic, 10, ShapedType::kDynamic, 10},
-                {ShapedType::kDynamic, 10}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse({1, ShapedType::kDynamic, 2, 2},
-                                               {1, ShapedType::kDynamic, 2}),
-            std::nullopt);
-  EXPECT_EQ(getReassociationIndicesForCollapse({2, 2, ShapedType::kDynamic, 1},
-                                               {2, ShapedType::kDynamic}),
-            std::nullopt);
-}
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index dcbfc8f1..5628a22 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -960,7 +960,10 @@ cc_library(
         "include/llvm/Analysis/WithCache.h",
     ] + [":llvm_intrinsics_headers"],
     copts = llvm_copts,
-    textual_hdrs = glob(["include/llvm/IR/*.def"]),
+    textual_hdrs = glob([
+        "include/llvm/IR/*.def",
+        "lib/IR/*.def",
+    ]),
     deps = [
         ":BinaryFormat",
         ":Demangle",
author	Qinkun Bao <qinkun@google.com>	2025-06-04 00:09:20 +0000
committer	Qinkun Bao <qinkun@google.com>	2025-06-04 00:09:20 +0000
commit	4b7b3fcc7e5a2c85607d5445eece319d7a1a22c9 (patch)
tree	03fb70d0af5bee0a070fe712637f25e606cdb752
parent	50f9b8acafdca48e87e6b8e393c1f116a2d193ee (diff)
parent	2ff2a076cc089f0c977ce7aea231ee5541879f8a (diff)
download	llvm-users/qinkunbao/spr/main.sanitizerdocnfi-update-the-doc-for-prefixsanitize.zip llvm-users/qinkunbao/spr/main.sanitizerdocnfi-update-the-doc-for-prefixsanitize.tar.gz llvm-users/qinkunbao/spr/main.sanitizerdocnfi-update-the-doc-for-prefixsanitize.tar.bz2