54 files changed, 1200 insertions, 799 deletions
diff --git a/clang/docs/ClangOffloadPackager.rst b/clang/docs/ClangOffloadPackager.rst
deleted file mode 100644
index 481069b..0000000
--- a/clang/docs/ClangOffloadPackager.rst
+++ /dev/null
@@ -1,193 +0,0 @@
-======================
-Clang Offload Packager
-======================
-
-.. contents::
-   :local:
-
-.. _clang-offload-packager:
-
-Introduction
-============
-
-This tool bundles device files into a single image containing necessary
-metadata. We use a custom binary format for bundling all the device images
-together. The image format is a small header wrapping around a string map. This
-tool creates bundled binaries so that they can be embedded into the host to
-create a fat-binary.
-
-Binary Format
-=============
-
-The binary format is marked by the ``0x10FF10AD`` magic bytes, followed by a
-version. Each created binary contains its own magic bytes. This allows us to
-locate all the embedded offloading sections even after they may have been merged
-by the linker, such as when using relocatable linking. Conceptually, this binary
-format is a serialization of a string map and an image buffer. The binary header
-is described in the following :ref:`table<table-binary_header>`.
-
-.. table:: Offloading Binary Header
-    :name: table-binary_header
-
-    +----------+--------------+----------------------------------------------------+
-    |   Type   |  Identifier  | Description                                        |
-    +==========+==============+====================================================+
-    | uint8_t  |    magic     | The magic bytes for the binary format (0x10FF10AD) |
-    +----------+--------------+----------------------------------------------------+
-    | uint32_t |   version    | Version of this format (currently version 1)       |
-    +----------+--------------+----------------------------------------------------+
-    | uint64_t |    size      | Size of this binary in bytes                       |
-    +----------+--------------+----------------------------------------------------+
-    | uint64_t | entry offset | Absolute offset of the offload entries in bytes    |
-    +----------+--------------+----------------------------------------------------+
-    | uint64_t |  entry size  | Size of the offload entries in bytes               |
-    +----------+--------------+----------------------------------------------------+
-
-Once identified through the magic bytes, we use the size field to take a slice
-of the binary blob containing the information for a single offloading image. We
-can then use the offset field to find the actual offloading entries containing
-the image and metadata. The offload entry contains information about the device
-image. It contains the fields shown in the following
-:ref:`table<table-binary_entry>`.
-
-.. table:: Offloading Entry Table
-    :name: table-binary_entry
-
-    +----------+---------------+----------------------------------------------------+
-    |   Type   |   Identifier  | Description                                        |
-    +==========+===============+====================================================+
-    | uint16_t |  image kind   | The kind of the device image (e.g. bc, cubin)      |
-    +----------+---------------+----------------------------------------------------+
-    | uint16_t | offload kind  | The producer of the image (e.g. openmp, cuda)      |
-    +----------+---------------+----------------------------------------------------+
-    | uint32_t |     flags     | Generic flags for the image                        |
-    +----------+---------------+----------------------------------------------------+
-    | uint64_t | string offset | Absolute offset of the string metadata table       |
-    +----------+---------------+----------------------------------------------------+
-    | uint64_t |  num strings  | Number of string entries in the table              |
-    +----------+---------------+----------------------------------------------------+
-    | uint64_t |  image offset | Absolute offset of the device image in bytes       |
-    +----------+---------------+----------------------------------------------------+
-    | uint64_t |   image size  | Size of the device image in bytes                  |
-    +----------+---------------+----------------------------------------------------+
-
-This table contains the offsets of the string table and the device image itself
-along with some other integer information. The image kind lets us easily
-identify the type of image stored here without needing to inspect the binary.
-The offloading kind is used to determine which registration code or linking
-semantics are necessary for this image. These are stored as enumerations with
-the following values for the :ref:`offload kind<table-offload_kind>` and the
-:ref:`image kind<table-image_kind>`.
-
-.. table:: Image Kind
-    :name: table-image_kind
-
-    +---------------+-------+---------------------------------------+
-    |      Name     | Value | Description                           |
-    +===============+=======+=======================================+
-    | IMG_None      | 0x00  | No image information provided         |
-    +---------------+-------+---------------------------------------+
-    | IMG_Object    | 0x01  | The image is a generic object file    |
-    +---------------+-------+---------------------------------------+
-    | IMG_Bitcode   | 0x02  | The image is an LLVM-IR bitcode file  |
-    +---------------+-------+---------------------------------------+
-    | IMG_Cubin     | 0x03  | The image is a CUDA object file       |
-    +---------------+-------+---------------------------------------+
-    | IMG_Fatbinary | 0x04  | The image is a CUDA fatbinary file    |
-    +---------------+-------+---------------------------------------+
-    | IMG_PTX       | 0x05  | The image is a CUDA PTX file          |
-    +---------------+-------+---------------------------------------+
-
-.. table:: Offload Kind
-    :name: table-offload_kind
-
-    +------------+-------+---------------------------------------+
-    |      Name  | Value | Description                           |
-    +============+=======+=======================================+
-    | OFK_None   | 0x00  | No offloading information provided    |
-    +------------+-------+---------------------------------------+
-    | OFK_OpenMP | 0x01  | The producer was OpenMP offloading    |
-    +------------+-------+---------------------------------------+
-    | OFK_CUDA   | 0x02  | The producer was CUDA                 |
-    +------------+-------+---------------------------------------+
-    | OFK_HIP    | 0x03  | The producer was HIP                  |
-    +------------+-------+---------------------------------------+
-    | OFK_SYCL   | 0x04  | The producer was SYCL                 |
-    +------------+-------+---------------------------------------+
-
-The flags are used to signify certain conditions, such as the presence of
-debugging information or whether or not LTO was used. The string entry table is
-used to generically contain any arbitrary key-value pair. This is stored as an
-array of the :ref:`string entry<table-binary_string>` format.
-
-.. table:: Offloading String Entry
-    :name: table-binary_string
-
-    +----------+--------------+-------------------------------------------------------+
-    |   Type   |   Identifier | Description                                           |
-    +==========+==============+=======================================================+
-    | uint64_t |  key offset  | Absolute byte offset of the key in the string table   |
-    +----------+--------------+-------------------------------------------------------+
-    | uint64_t | value offset | Absolute byte offset of the value in the string table |
-    +----------+--------------+-------------------------------------------------------+
-
-The string entries simply provide offsets to a key and value pair in the
-binary images string table. The string table is simply a collection of null
-terminated strings with defined offsets in the image. The string entry allows us
-to create a key-value pair from this string table. This is used for passing
-arbitrary arguments to the image, such as the triple and architecture.
-
-All of these structures are combined to form a single binary blob, the order
-does not matter because of the use of absolute offsets. This makes it easier to
-extend in the future. As mentioned previously, multiple offloading images are
-bundled together by simply concatenating them in this format. Because we have
-the magic bytes and size of each image, we can extract them as-needed.
-
-Usage
-=====
-
-This tool can be used with the following arguments. Generally information is
-passed as a key-value pair to the ``image=`` argument. The ``file`` and
-``triple``, arguments are considered mandatory to make a valid image.
-The ``arch`` argument is suggested.
-
-.. code-block:: console
-
-  OVERVIEW: A utility for bundling several object files into a single binary.
-  The output binary can then be embedded into the host section table
-  to create a fatbinary containing offloading code.
-
-  USAGE: clang-offload-packager [options]
-
-  OPTIONS:
-
-  Generic Options:
-
-    --help                      - Display available options (--help-hidden for more)
-    --help-list                 - Display list of available options (--help-list-hidden for more)
-    --version                   - Display the version of this program
-
-  clang-offload-packager options:
-
-    --image=<<key>=<value>,...> - List of key and value arguments. Required
-                                  keywords are 'file' and 'triple'.
-    -o <file>                   - Write output to <file>.
-
-Example
-=======
-
-This tool simply takes many input files from the ``image`` option and creates a
-single output file with all the images combined.
-
-.. code-block:: console
-
-  clang-offload-packager -o out.bin --image=file=input.o,triple=nvptx64,arch=sm_70
-
-The inverse operation can be performed instead by passing the packaged binary as
-input. In this mode the matching images will either be placed in the output
-specified by the ``file`` option. If no ``file`` argument is provided a name
-will be generated for each matching image.
-
-.. code-block:: console
-
-  clang-offload-packager in.bin --image=file=output.o,triple=nvptx64,arch=sm_70
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index d4ffcf3..66c4c04 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -278,22 +278,22 @@ def CIR_PtrStrideOp : CIR_Op<"ptr_stride", [
 def CIR_ConstantOp : CIR_Op<"const", [
   ConstantLike, Pure, AllTypesMatch<["value", "res"]>
 ]> {
-  let summary = "Defines a CIR constant";
+  let summary = "Create a CIR constant from a literal attribute";
   let description = [{
     The `cir.const` operation turns a literal into an SSA value. The data is
     attached to the operation as an attribute.
 
     ```mlir
-      %0 = cir.const 42 : i32
-      %1 = cir.const 4.2 : f32
-      %2 = cir.const nullptr : !cir.ptr<i32>
+      %0 = cir.const #cir.int<4> : !u32i
+      %1 = cir.const #cir.fp<1.500000e+00> : !cir.float
+      %2 = cir.const #cir.ptr<null> : !cir.ptr<!void>
     ```
   }];
 
   let arguments = (ins TypedAttrInterface:$value);
   let results = (outs CIR_AnyType:$res);
 
-  let assemblyFormat = "attr-dict $value";
+  let assemblyFormat = "$value attr-dict";
 
   let hasVerifier = 1;
 
@@ -3277,9 +3277,9 @@ def CIR_ComplexCreateOp : CIR_Op<"complex.create", [Pure, SameTypeOperands]> {
 def CIR_ComplexRealOp : CIR_Op<"complex.real", [Pure]> {
   let summary = "Extract the real part of a complex value";
   let description = [{
-    `cir.complex.real` operation takes an operand of `!cir.complex`, `!cir.int`
-    or `!cir.float`. If the operand is `!cir.complex`, the real part of it will
-    be returned, otherwise the value returned unmodified. 
+    `cir.complex.real` operation takes an operand of `!cir.complex`, `cir.int`, 
+    `!cir.bool` or `!cir.float`. If the operand is `!cir.complex`, the real 
+    part of it will be returned, otherwise the value returned unmodified. 
 
     Example:
 
@@ -3289,8 +3289,8 @@ def CIR_ComplexRealOp : CIR_Op<"complex.real", [Pure]> {
     ```
   }];
 
-  let results = (outs CIR_AnyIntOrFloatType:$result);
-  let arguments = (ins CIR_AnyComplexOrIntOrFloatType:$operand);
+  let results = (outs CIR_AnyIntOrBoolOrFloatType:$result);
+  let arguments = (ins CIR_AnyComplexOrIntOrBoolOrFloatType:$operand);
 
   let assemblyFormat = [{
     $operand `:` qualified(type($operand)) `->` qualified(type($result))
@@ -3309,8 +3309,8 @@ def CIR_ComplexImagOp : CIR_Op<"complex.imag", [Pure]> {
   let summary = "Extract the imaginary part of a complex value";
   let description = [{
     `cir.complex.imag` operation takes an operand of `!cir.complex`, `!cir.int`
-    or `!cir.float`. If the operand is `!cir.complex`, the imag part of it will
-    be returned, otherwise a zero value will be returned.  
+    `!cir.bool` or `!cir.float`. If the operand is `!cir.complex`, the imag 
+    part of it will be returned, otherwise a zero value will be returned.  
 
     Example:
 
@@ -3320,8 +3320,8 @@ def CIR_ComplexImagOp : CIR_Op<"complex.imag", [Pure]> {
     ```
   }];
 
-  let results = (outs CIR_AnyIntOrFloatType:$result);
-  let arguments = (ins CIR_AnyComplexOrIntOrFloatType:$operand);
+  let results = (outs CIR_AnyIntOrBoolOrFloatType:$result);
+  let arguments = (ins CIR_AnyComplexOrIntOrBoolOrFloatType:$operand);
 
   let assemblyFormat = [{
     $operand `:` qualified(type($operand)) `->` qualified(type($result))
@@ -4169,6 +4169,40 @@ def CIR_ThrowOp : CIR_Op<"throw"> {
 }
 
 //===----------------------------------------------------------------------===//
+// AllocExceptionOp
+//===----------------------------------------------------------------------===//
+
+def CIR_AllocExceptionOp : CIR_Op<"alloc.exception"> {
+  let summary = "Allocates an exception according to Itanium ABI";
+  let description = [{
+    Implements a slightly higher level __cxa_allocate_exception:
+
+    `void *__cxa_allocate_exception(size_t thrown_size);`
+
+    If the operation fails, the program terminates rather than throw.
+
+    Example:
+
+    ```mlir
+    // if (b == 0) {
+    //   ...
+    //   throw "...";
+    cir.if %10 {
+        %11 = cir.alloc_exception 8 -> !cir.ptr<!void>
+        ... // store exception content into %11
+        cir.throw %11 : !cir.ptr<!cir.ptr<!u8i>>, ...
+    ```
+  }];
+
+  let arguments = (ins I64Attr:$size);
+  let results = (outs Res<CIR_PointerType, "", [MemAlloc<DefaultResource>]>:$addr);
+
+  let assemblyFormat = [{
+    $size `->` qualified(type($addr)) attr-dict
+  }];
+}
+
+//===----------------------------------------------------------------------===//
 // Atomic operations
 //===----------------------------------------------------------------------===//
 
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
index da03a29..a1ebd6c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
@@ -159,16 +159,22 @@ def CIR_AnyIntOrFloatType : AnyTypeOf<[CIR_AnyFloatType, CIR_AnyIntType],
     let cppFunctionName = "isAnyIntegerOrFloatingPointType";
 }
 
+def CIR_AnyIntOrBoolOrFloatType
+    : AnyTypeOf<[CIR_AnyBoolType, CIR_AnyFloatType, CIR_AnyIntType],
+                "integer, boolean or floating point type"> {
+  let cppFunctionName = "isAnyIntegerOrBooleanOrFloatingPointType";
+}
+
 //===----------------------------------------------------------------------===//
 // Complex Type predicates
 //===----------------------------------------------------------------------===//
 
 def CIR_AnyComplexType : CIR_TypeBase<"::cir::ComplexType", "complex type">;
 
-def CIR_AnyComplexOrIntOrFloatType : AnyTypeOf<[
-    CIR_AnyComplexType, CIR_AnyFloatType, CIR_AnyIntType
-], "complex, integer or floating point type"> {
-    let cppFunctionName = "isComplexOrIntegerOrFloatingPointType";
+def CIR_AnyComplexOrIntOrBoolOrFloatType
+    : AnyTypeOf<[CIR_AnyComplexType, CIR_AnyIntOrBoolOrFloatType],
+                "complex, integer or floating point type"> {
+  let cppFunctionName = "isComplexOrIntegerOrBoolOrFloatingPointType";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index d34a414..59bbd0f 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -1202,12 +1202,12 @@ class Sema;
 
       /// Would use of this function result in a rewrite using a different
       /// operator?
-      bool isRewrittenOperator(const FunctionDecl *FD) {
+      bool isRewrittenOperator(const FunctionDecl *FD) const {
         return OriginalOperator &&
                FD->getDeclName().getCXXOverloadedOperator() != OriginalOperator;
       }
 
-      bool isAcceptableCandidate(const FunctionDecl *FD) {
+      bool isAcceptableCandidate(const FunctionDecl *FD) const {
         if (!OriginalOperator)
           return true;
 
@@ -1234,7 +1234,7 @@ class Sema;
       }
       /// Determines whether this operator could be implemented by a function
       /// with reversed parameter order.
-      bool isReversible() {
+      bool isReversible() const {
         return AllowRewrittenCandidates && OriginalOperator &&
                (getRewrittenOverloadedOperator(OriginalOperator) != OO_None ||
                 allowsReversed(OriginalOperator));
@@ -1242,13 +1242,13 @@ class Sema;
 
       /// Determine whether reversing parameter order is allowed for operator
       /// Op.
-      bool allowsReversed(OverloadedOperatorKind Op);
+      bool allowsReversed(OverloadedOperatorKind Op) const;
 
       /// Determine whether we should add a rewritten candidate for \p FD with
       /// reversed parameter order.
       /// \param OriginalArgs are the original non reversed arguments.
       bool shouldAddReversed(Sema &S, ArrayRef<Expr *> OriginalArgs,
-                             FunctionDecl *FD);
+                             FunctionDecl *FD) const;
     };
 
   private:
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6af7ef3..1eea813 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2314,10 +2314,14 @@ static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
     if (Ptr.isBaseClass())
       ByteOffset = computePointerOffset(ASTCtx, Ptr.getBase()) -
                    computePointerOffset(ASTCtx, Ptr);
-    else
-      ByteOffset =
-          computePointerOffset(ASTCtx, Ptr) -
-          computePointerOffset(ASTCtx, Ptr.expand().atIndex(0).narrow());
+    else {
+      if (Ptr.inArray())
+        ByteOffset =
+            computePointerOffset(ASTCtx, Ptr) -
+            computePointerOffset(ASTCtx, Ptr.expand().atIndex(0).narrow());
+      else
+        ByteOffset = 0;
+    }
   } else
     ByteOffset = computePointerOffset(ASTCtx, Ptr);
 
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index dc3778b..2b89370 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -537,33 +537,16 @@ WarningsSpecialCaseList::create(const llvm::MemoryBuffer &Input,
 }
 
 void WarningsSpecialCaseList::processSections(DiagnosticsEngine &Diags) {
-  // Drop the default section introduced by special case list, we only support
-  // exact diagnostic group names.
-  // FIXME: We should make this configurable in the parser instead.
-  // FIXME: C++20 can use std::erase_if(Sections, [](Section &sec) { return
-  // sec.SectionStr == "*"; });
-  llvm::erase_if(Sections, [](Section &sec) { return sec.SectionStr == "*"; });
-  // Make sure we iterate sections by their line numbers.
-  std::vector<std::pair<unsigned, const Section *>> LineAndSectionEntry;
-  LineAndSectionEntry.reserve(Sections.size());
-  for (const auto &Entry : Sections) {
-    StringRef DiagName = Entry.SectionStr;
-    // Each section has a matcher with that section's name, attached to that
-    // line.
-    const auto &DiagSectionMatcher = Entry.SectionMatcher;
-    unsigned DiagLine = 0;
-    for (const auto &Glob : DiagSectionMatcher->Globs)
-      if (Glob->Name == DiagName) {
-        DiagLine = Glob->LineNo;
-        break;
-      }
-    LineAndSectionEntry.emplace_back(DiagLine, &Entry);
-  }
-  llvm::sort(LineAndSectionEntry);
   static constexpr auto WarningFlavor = clang::diag::Flavor::WarningOrError;
-  for (const auto &[_, SectionEntry] : LineAndSectionEntry) {
+  for (const auto &SectionEntry : Sections) {
+    StringRef DiagGroup = SectionEntry.SectionStr;
+    if (DiagGroup == "*") {
+      // Drop the default section introduced by special case list, we only
+      // support exact diagnostic group names.
+      // FIXME: We should make this configurable in the parser instead.
+      continue;
+    }
     SmallVector<diag::kind> GroupDiags;
-    StringRef DiagGroup = SectionEntry->SectionStr;
     if (Diags.getDiagnosticIDs()->getDiagnosticsInGroup(
             WarningFlavor, DiagGroup, GroupDiags)) {
       StringRef Suggestion =
@@ -576,7 +559,7 @@ void WarningsSpecialCaseList::processSections(DiagnosticsEngine &Diags) {
     for (diag::kind Diag : GroupDiags)
       // We're intentionally overwriting any previous mappings here to make sure
       // latest one takes precedence.
-      DiagToSection[Diag] = SectionEntry;
+      DiagToSection[Diag] = &SectionEntry;
   }
 }
 
diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp
index f7bc1d5..582c255 100644
--- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp
+++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp
@@ -42,7 +42,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() {
     SanitizerMask Mask;
 
 #define SANITIZER(NAME, ID)                                                    \
-  if (S.SectionMatcher->match(NAME))                                           \
+  if (S.SectionMatcher.match(NAME))                                            \
     Mask |= SanitizerKind::ID;
 #define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID)
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
index 1dee774..be66240 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
@@ -113,6 +113,7 @@ public:
                                           CIRGenFunction &cgf) = 0;
 
   virtual void emitRethrow(CIRGenFunction &cgf, bool isNoReturn) = 0;
+  virtual void emitThrow(CIRGenFunction &cgf, const CXXThrowExpr *e) = 0;
 
   virtual mlir::Attribute getAddrOfRTTIDescriptor(mlir::Location loc,
                                                   QualType ty) = 0;
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.h b/clang/lib/CIR/CodeGen/CIRGenCleanup.h
index a4ec8cc..30f5607 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.h
@@ -104,6 +104,7 @@ public:
   bool isNormalCleanup() const { return cleanupBits.isNormalCleanup; }
 
   bool isActive() const { return cleanupBits.isActive; }
+  void setActive(bool isActive) { cleanupBits.isActive = isActive; }
 
   size_t getCleanupSize() const { return cleanupBits.cleanupSize; }
   void *getCleanupBuffer() { return this + 1; }
@@ -138,5 +139,13 @@ inline EHScopeStack::iterator EHScopeStack::begin() const {
   return iterator(startOfData);
 }
 
+inline EHScopeStack::iterator
+EHScopeStack::find(stable_iterator savePoint) const {
+  assert(savePoint.isValid() && "finding invalid savepoint");
+  assert(savePoint.size <= stable_begin().size &&
+         "finding savepoint after pop");
+  return iterator(endOfBuffer - savePoint.size);
+}
+
 } // namespace clang::CIRGen
 #endif // CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H
diff --git a/clang/lib/CIR/CodeGen/CIRGenException.cpp b/clang/lib/CIR/CodeGen/CIRGenException.cpp
index 7fcb39a..6453843 100644
--- a/clang/lib/CIR/CodeGen/CIRGenException.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenException.cpp
@@ -31,11 +31,36 @@ void CIRGenFunction::emitCXXThrowExpr(const CXXThrowExpr *e) {
     if (throwType->isObjCObjectPointerType()) {
       cgm.errorNYI("emitCXXThrowExpr ObjCObjectPointerType");
       return;
-    } else {
-      cgm.errorNYI("emitCXXThrowExpr with subExpr");
-      return;
     }
-  } else {
-    cgm.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true);
+
+    cgm.getCXXABI().emitThrow(*this, e);
+    return;
   }
+
+  cgm.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true);
+}
+
+void CIRGenFunction::emitAnyExprToExn(const Expr *e, Address addr) {
+  // Make sure the exception object is cleaned up if there's an
+  // exception during initialization.
+  assert(!cir::MissingFeatures::ehCleanupScope());
+
+  // __cxa_allocate_exception returns a void*;  we need to cast this
+  // to the appropriate type for the object.
+  mlir::Type ty = convertTypeForMem(e->getType());
+  Address typedAddr = addr.withElementType(builder, ty);
+
+  // From LLVM's codegen:
+  // FIXME: this isn't quite right!  If there's a final unelided call
+  // to a copy constructor, then according to [except.terminate]p1 we
+  // must call std::terminate() if that constructor throws, because
+  // technically that copy occurs after the exception expression is
+  // evaluated but before the exception is caught.  But the best way
+  // to handle that is to teach EmitAggExpr to do the final copy
+  // differently if it can't be elided.
+  emitAnyExprToMem(e, typedAddr, e->getType().getQualifiers(),
+                   /*isInitializer=*/true);
+
+  // Deactivate the cleanup block.
+  assert(!cir::MissingFeatures::ehCleanupScope());
 }
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index e20a4fc..59aa257 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -118,6 +118,9 @@ class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils {
   /// non-packed LLVM struct will give the correct layout.
   bool naturalLayout = true;
 
+  bool split(size_t index, CharUnits hint);
+  std::optional<size_t> splitAt(CharUnits pos);
+
   static mlir::Attribute buildFrom(CIRGenModule &cgm, ArrayRef<Element> elems,
                                    CharUnits startOffset, CharUnits size,
                                    bool naturalLayout, mlir::Type desiredTy,
@@ -137,6 +140,10 @@ public:
   /// Update or overwrite the bits starting at \p offsetInBits with \p bits.
   bool addBits(llvm::APInt bits, uint64_t offsetInBits, bool allowOverwrite);
 
+  /// Attempt to condense the value starting at \p offset to a constant of type
+  /// \p desiredTy.
+  void condense(CharUnits offset, mlir::Type desiredTy);
+
   /// Produce a constant representing the entire accumulated value, ideally of
   /// the specified type. If \p allowOversized, the constant might be larger
   /// than implied by \p desiredTy (eg, if there is a flexible array member).
@@ -176,6 +183,195 @@ bool ConstantAggregateBuilder::add(mlir::TypedAttr typedAttr, CharUnits offset,
   return false;
 }
 
+bool ConstantAggregateBuilder::addBits(llvm::APInt bits, uint64_t offsetInBits,
+                                       bool allowOverwrite) {
+  const ASTContext &astContext = cgm.getASTContext();
+  const uint64_t charWidth = astContext.getCharWidth();
+  mlir::Type charTy = cgm.getBuilder().getUIntNTy(charWidth);
+
+  // Offset of where we want the first bit to go within the bits of the
+  // current char.
+  unsigned offsetWithinChar = offsetInBits % charWidth;
+
+  // We split bit-fields up into individual bytes. Walk over the bytes and
+  // update them.
+  for (CharUnits offsetInChars =
+           astContext.toCharUnitsFromBits(offsetInBits - offsetWithinChar);
+       /**/; ++offsetInChars) {
+    // Number of bits we want to fill in this char.
+    unsigned wantedBits =
+        std::min((uint64_t)bits.getBitWidth(), charWidth - offsetWithinChar);
+
+    // Get a char containing the bits we want in the right places. The other
+    // bits have unspecified values.
+    llvm::APInt bitsThisChar = bits;
+    if (bitsThisChar.getBitWidth() < charWidth)
+      bitsThisChar = bitsThisChar.zext(charWidth);
+    if (cgm.getDataLayout().isBigEndian()) {
+      // Figure out how much to shift by. We may need to left-shift if we have
+      // less than one byte of Bits left.
+      int shift = bits.getBitWidth() - charWidth + offsetWithinChar;
+      if (shift > 0)
+        bitsThisChar.lshrInPlace(shift);
+      else if (shift < 0)
+        bitsThisChar = bitsThisChar.shl(-shift);
+    } else {
+      bitsThisChar = bitsThisChar.shl(offsetWithinChar);
+    }
+    if (bitsThisChar.getBitWidth() > charWidth)
+      bitsThisChar = bitsThisChar.trunc(charWidth);
+
+    if (wantedBits == charWidth) {
+      // Got a full byte: just add it directly.
+      add(cir::IntAttr::get(charTy, bitsThisChar), offsetInChars,
+          allowOverwrite);
+    } else {
+      // Partial byte: update the existing integer if there is one. If we
+      // can't split out a 1-CharUnit range to update, then we can't add
+      // these bits and fail the entire constant emission.
+      std::optional<size_t> firstElemToUpdate = splitAt(offsetInChars);
+      if (!firstElemToUpdate)
+        return false;
+      std::optional<size_t> lastElemToUpdate =
+          splitAt(offsetInChars + CharUnits::One());
+      if (!lastElemToUpdate)
+        return false;
+      assert(*lastElemToUpdate - *firstElemToUpdate < 2 &&
+             "should have at most one element covering one byte");
+
+      // Figure out which bits we want and discard the rest.
+      llvm::APInt updateMask(charWidth, 0);
+      if (cgm.getDataLayout().isBigEndian())
+        updateMask.setBits(charWidth - offsetWithinChar - wantedBits,
+                           charWidth - offsetWithinChar);
+      else
+        updateMask.setBits(offsetWithinChar, offsetWithinChar + wantedBits);
+      bitsThisChar &= updateMask;
+      bool isNull = false;
+      if (*firstElemToUpdate < elements.size()) {
+        auto firstEltToUpdate =
+            mlir::dyn_cast<cir::IntAttr>(elements[*firstElemToUpdate].element);
+        isNull = firstEltToUpdate && firstEltToUpdate.isNullValue();
+      }
+
+      if (*firstElemToUpdate == *lastElemToUpdate || isNull) {
+        // All existing bits are either zero or undef.
+        add(cir::IntAttr::get(charTy, bitsThisChar), offsetInChars,
+            /*allowOverwrite*/ true);
+      } else {
+        cir::IntAttr ci =
+            mlir::dyn_cast<cir::IntAttr>(elements[*firstElemToUpdate].element);
+        // In order to perform a partial update, we need the existing bitwise
+        // value, which we can only extract for a constant int.
+        if (!ci)
+          return false;
+        // Because this is a 1-CharUnit range, the constant occupying it must
+        // be exactly one CharUnit wide.
+        assert(ci.getBitWidth() == charWidth && "splitAt failed");
+        assert((!(ci.getValue() & updateMask) || allowOverwrite) &&
+               "unexpectedly overwriting bitfield");
+        bitsThisChar |= (ci.getValue() & ~updateMask);
+        elements[*firstElemToUpdate].element =
+            cir::IntAttr::get(charTy, bitsThisChar);
+      }
+    }
+
+    // Stop if we've added all the bits.
+    if (wantedBits == bits.getBitWidth())
+      break;
+
+    // Remove the consumed bits from Bits.
+    if (!cgm.getDataLayout().isBigEndian())
+      bits.lshrInPlace(wantedBits);
+    bits = bits.trunc(bits.getBitWidth() - wantedBits);
+
+    // The remaining bits go at the start of the following bytes.
+    offsetWithinChar = 0;
+  }
+
+  return true;
+}
+
+/// Returns a position within elements such that all elements
+/// before the returned index end before pos and all elements at or after
+/// the returned index begin at or after pos. Splits elements as necessary
+/// to ensure this. Returns std::nullopt if we find something we can't split.
+std::optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits pos) {
+  if (pos >= size)
+    return elements.size();
+
+  while (true) {
+    // Find the first element that starts after pos.
+    Element *iter =
+        llvm::upper_bound(elements, pos, [](CharUnits pos, const Element &elt) {
+          return pos < elt.offset;
+        });
+
+    if (iter == elements.begin())
+      return 0;
+
+    size_t index = iter - elements.begin() - 1;
+    const Element &elt = elements[index];
+
+    // If we already have an element starting at pos, we're done.
+    if (elt.offset == pos)
+      return index;
+
+    // Check for overlap with the element that starts before pos.
+    CharUnits eltEnd = elt.offset + getSize(elt.element);
+    if (eltEnd <= pos)
+      return index + 1;
+
+    // Try to decompose it into smaller constants.
+    if (!split(index, pos))
+      return std::nullopt;
+  }
+}
+
+/// Split the constant at index, if possible. Return true if we did.
+/// Hint indicates the location at which we'd like to split, but may be
+/// ignored.
+bool ConstantAggregateBuilder::split(size_t index, CharUnits hint) {
+  cgm.errorNYI("split constant at index");
+  return false;
+}
+
+void ConstantAggregateBuilder::condense(CharUnits offset,
+                                        mlir::Type desiredTy) {
+  CharUnits desiredSize = getSize(desiredTy);
+
+  std::optional<size_t> firstElemToReplace = splitAt(offset);
+  if (!firstElemToReplace)
+    return;
+  size_t first = *firstElemToReplace;
+
+  std::optional<size_t> lastElemToReplace = splitAt(offset + desiredSize);
+  if (!lastElemToReplace)
+    return;
+  size_t last = *lastElemToReplace;
+
+  size_t length = last - first;
+  if (length == 0)
+    return;
+
+  if (length == 1 && elements[first].offset == offset &&
+      getSize(elements[first].element) == desiredSize) {
+    cgm.errorNYI("re-wrapping single element records");
+    return;
+  }
+
+  // Build a new constant from the elements in the range.
+  SmallVector<Element> subElems(elements.begin() + first,
+                                elements.begin() + last);
+  mlir::Attribute replacement =
+      buildFrom(cgm, subElems, offset, desiredSize,
+                /*naturalLayout=*/false, desiredTy, false);
+
+  // Replace the range with the condensed constant.
+  Element newElt(mlir::cast<mlir::TypedAttr>(replacement), offset);
+  replace(elements, first, last, {newElt});
+}
+
 mlir::Attribute
 ConstantAggregateBuilder::buildFrom(CIRGenModule &cgm, ArrayRef<Element> elems,
                                     CharUnits startOffset, CharUnits size,
@@ -301,6 +497,9 @@ private:
   bool appendBytes(CharUnits fieldOffsetInChars, mlir::TypedAttr initCst,
                    bool allowOverwrite = false);
 
+  bool appendBitField(const FieldDecl *field, uint64_t fieldOffset,
+                      cir::IntAttr ci, bool allowOverwrite = false);
+
   bool build(InitListExpr *ile, bool allowOverwrite);
   bool build(const APValue &val, const RecordDecl *rd, bool isPrimaryBase,
              const CXXRecordDecl *vTableClass, CharUnits baseOffset);
@@ -325,6 +524,30 @@ bool ConstRecordBuilder::appendBytes(CharUnits fieldOffsetInChars,
   return builder.add(initCst, startOffset + fieldOffsetInChars, allowOverwrite);
 }
 
+bool ConstRecordBuilder::appendBitField(const FieldDecl *field,
+                                        uint64_t fieldOffset, cir::IntAttr ci,
+                                        bool allowOverwrite) {
+  const CIRGenRecordLayout &rl =
+      cgm.getTypes().getCIRGenRecordLayout(field->getParent());
+  const CIRGenBitFieldInfo &info = rl.getBitFieldInfo(field);
+  llvm::APInt fieldValue = ci.getValue();
+
+  // Promote the size of FieldValue if necessary
+  // FIXME: This should never occur, but currently it can because initializer
+  // constants are cast to bool, and because clang is not enforcing bitfield
+  // width limits.
+  if (info.size > fieldValue.getBitWidth())
+    fieldValue = fieldValue.zext(info.size);
+
+  // Truncate the size of FieldValue to the bit field size.
+  if (info.size < fieldValue.getBitWidth())
+    fieldValue = fieldValue.trunc(info.size);
+
+  return builder.addBits(fieldValue,
+                         cgm.getASTContext().toBits(startOffset) + fieldOffset,
+                         allowOverwrite);
+}
+
 bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
   RecordDecl *rd = ile->getType()
                        ->castAs<clang::RecordType>()
@@ -407,12 +630,14 @@ bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
     } else {
       // Otherwise we have a bitfield.
       if (auto constInt = dyn_cast<cir::IntAttr>(eltInit)) {
-        assert(!cir::MissingFeatures::bitfields());
-        cgm.errorNYI(field->getSourceRange(), "bitfields");
+        if (!appendBitField(field, layout.getFieldOffset(index), constInt,
+                            allowOverwrite))
+          return false;
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
       }
-      // We are trying to initialize a bitfield with a non-trivial constant,
-      // this must require run-time code.
-      return false;
     }
   }
 
@@ -510,8 +735,16 @@ bool ConstRecordBuilder::build(const APValue &val, const RecordDecl *rd,
       if (field->hasAttr<NoUniqueAddressAttr>())
         allowOverwrite = true;
     } else {
-      assert(!cir::MissingFeatures::bitfields());
-      cgm.errorNYI(field->getSourceRange(), "bitfields");
+      // Otherwise we have a bitfield.
+      if (auto constInt = dyn_cast<cir::IntAttr>(eltInit)) {
+        if (!appendBitField(field, layout.getFieldOffset(index) + offsetBits,
+                            constInt, allowOverwrite))
+          return false;
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
+      }
     }
   }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index cbc0f4a..d10d058 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1090,6 +1090,8 @@ public:
   /// even if no aggregate location is provided.
   RValue emitAnyExprToTemp(const clang::Expr *e);
 
+  void emitAnyExprToExn(const Expr *e, Address addr);
+
   void emitArrayDestroy(mlir::Value begin, mlir::Value numElements,
                         QualType elementType, CharUnits elementAlign,
                         Destroyer *destroyer);
diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
index debea8af..0418174 100644
--- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
@@ -70,6 +70,7 @@ public:
                           QualType thisTy) override;
 
   void emitRethrow(CIRGenFunction &cgf, bool isNoReturn) override;
+  void emitThrow(CIRGenFunction &cgf, const CXXThrowExpr *e) override;
 
   bool useThunkForDtorVariant(const CXXDestructorDecl *dtor,
                               CXXDtorType dt) const override {
@@ -1544,6 +1545,59 @@ void CIRGenItaniumCXXABI::emitRethrow(CIRGenFunction &cgf, bool isNoReturn) {
   }
 }
 
+void CIRGenItaniumCXXABI::emitThrow(CIRGenFunction &cgf,
+                                    const CXXThrowExpr *e) {
+  // This differs a bit from LLVM codegen, CIR has native operations for some
+  // cxa functions, and defers allocation size computation, always pass the dtor
+  // symbol, etc. CIRGen also does not use getAllocateExceptionFn / getThrowFn.
+
+  // Now allocate the exception object.
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  QualType clangThrowType = e->getSubExpr()->getType();
+  cir::PointerType throwTy =
+      builder.getPointerTo(cgf.convertType(clangThrowType));
+  uint64_t typeSize =
+      cgf.getContext().getTypeSizeInChars(clangThrowType).getQuantity();
+  mlir::Location subExprLoc = cgf.getLoc(e->getSubExpr()->getSourceRange());
+
+  // Defer computing allocation size to some later lowering pass.
+  mlir::TypedValue<cir::PointerType> exceptionPtr =
+      cir::AllocExceptionOp::create(builder, subExprLoc, throwTy,
+                                    builder.getI64IntegerAttr(typeSize))
+          .getAddr();
+
+  // Build expression and store its result into exceptionPtr.
+  CharUnits exnAlign = cgf.getContext().getExnObjectAlignment();
+  cgf.emitAnyExprToExn(e->getSubExpr(), Address(exceptionPtr, exnAlign));
+
+  // Get the RTTI symbol address.
+  auto typeInfo = mlir::cast<cir::GlobalViewAttr>(
+      cgm.getAddrOfRTTIDescriptor(subExprLoc, clangThrowType,
+                                  /*forEH=*/true));
+  assert(!typeInfo.getIndices() && "expected no indirection");
+
+  // The address of the destructor.
+  //
+  // Note: LLVM codegen already optimizes out the dtor if the
+  // type is a record with trivial dtor (by passing down a
+  // null dtor). In CIR, we forward this info and allow for
+  // Lowering pass to skip passing the trivial function.
+  //
+  if (const RecordType *recordTy = clangThrowType->getAs<RecordType>()) {
+    CXXRecordDecl *rec =
+        cast<CXXRecordDecl>(recordTy->getOriginalDecl()->getDefinition());
+    assert(!cir::MissingFeatures::isTrivialCtorOrDtor());
+    if (!rec->hasTrivialDestructor()) {
+      cgm.errorNYI("emitThrow: non-trivial destructor");
+      return;
+    }
+  }
+
+  // Now throw the exception.
+  mlir::Location loc = cgf.getLoc(e->getSourceRange());
+  insertThrowAndSplit(builder, loc, exceptionPtr, typeInfo.getSymbol());
+}
+
 CIRGenCXXABI *clang::CIRGen::CreateCIRGenItaniumCXXABI(CIRGenModule &cgm) {
   switch (cgm.getASTContext().getCXXABIKind()) {
   case TargetCXXABI::GenericItanium:
diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h
index c87a6ef..66c1f76 100644
--- a/clang/lib/CIR/CodeGen/EHScopeStack.h
+++ b/clang/lib/CIR/CodeGen/EHScopeStack.h
@@ -175,6 +175,10 @@ public:
     return stable_iterator(endOfBuffer - startOfData);
   }
 
+  /// Turn a stable reference to a scope depth into a unstable pointer
+  /// to the EH stack.
+  iterator find(stable_iterator savePoint) const;
+
   /// Create a stable reference to the bottom of the EH stack.
   static stable_iterator stable_end() { return stable_iterator(0); }
 };
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 3a3c631..e9649af 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2581,22 +2581,69 @@ void createLLVMFuncOpIfNotExist(mlir::ConversionPatternRewriter &rewriter,
 mlir::LogicalResult CIRToLLVMThrowOpLowering::matchAndRewrite(
     cir::ThrowOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
-  if (op.rethrows()) {
-    auto voidTy = mlir::LLVM::LLVMVoidType::get(getContext());
-    auto funcTy =
-        mlir::LLVM::LLVMFunctionType::get(getContext(), voidTy, {}, false);
+  mlir::Location loc = op.getLoc();
+  auto voidTy = mlir::LLVM::LLVMVoidType::get(getContext());
 
-    auto mlirModule = op->getParentOfType<mlir::ModuleOp>();
-    rewriter.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
+  if (op.rethrows()) {
+    auto funcTy = mlir::LLVM::LLVMFunctionType::get(voidTy, {});
 
+    // Get or create `declare void @__cxa_rethrow()`
     const llvm::StringRef functionName = "__cxa_rethrow";
     createLLVMFuncOpIfNotExist(rewriter, op, functionName, funcTy);
 
-    rewriter.setInsertionPointAfter(op.getOperation());
-    rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
-        op, mlir::TypeRange{}, functionName, mlir::ValueRange{});
+    auto cxaRethrow = mlir::LLVM::CallOp::create(
+        rewriter, loc, mlir::TypeRange{}, functionName);
+
+    rewriter.replaceOp(op, cxaRethrow);
+    return mlir::success();
   }
 
+  auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+  auto fnTy = mlir::LLVM::LLVMFunctionType::get(
+      voidTy, {llvmPtrTy, llvmPtrTy, llvmPtrTy});
+
+  // Get or create `declare void @__cxa_throw(ptr, ptr, ptr)`
+  const llvm::StringRef fnName = "__cxa_throw";
+  createLLVMFuncOpIfNotExist(rewriter, op, fnName, fnTy);
+
+  mlir::Value typeInfo = mlir::LLVM::AddressOfOp::create(
+      rewriter, loc, mlir::LLVM::LLVMPointerType::get(rewriter.getContext()),
+      adaptor.getTypeInfoAttr());
+
+  mlir::Value dtor;
+  if (op.getDtor()) {
+    dtor = mlir::LLVM::AddressOfOp::create(rewriter, loc, llvmPtrTy,
+                                           adaptor.getDtorAttr());
+  } else {
+    dtor = mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy);
+  }
+
+  auto cxaThrowCall = mlir::LLVM::CallOp::create(
+      rewriter, loc, mlir::TypeRange{}, fnName,
+      mlir::ValueRange{adaptor.getExceptionPtr(), typeInfo, dtor});
+
+  rewriter.replaceOp(op, cxaThrowCall);
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMAllocExceptionOpLowering::matchAndRewrite(
+    cir::AllocExceptionOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  // Get or create `declare ptr @__cxa_allocate_exception(i64)`
+  StringRef fnName = "__cxa_allocate_exception";
+  auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+  auto int64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
+  auto fnTy = mlir::LLVM::LLVMFunctionType::get(llvmPtrTy, {int64Ty});
+
+  createLLVMFuncOpIfNotExist(rewriter, op, fnName, fnTy);
+  auto exceptionSize = mlir::LLVM::ConstantOp::create(rewriter, op.getLoc(),
+                                                      adaptor.getSizeAttr());
+
+  auto allocaExceptionCall = mlir::LLVM::CallOp::create(
+      rewriter, op.getLoc(), mlir::TypeRange{llvmPtrTy}, fnName,
+      mlir::ValueRange{exceptionSize});
+
+  rewriter.replaceOp(op, allocaExceptionCall);
   return mlir::success();
 }
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index 810d6aa..3a7ee54 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -163,12 +163,14 @@ public:
                           SourceLocation Loc) override;
 
   // Currently unsupported on the device.
+  using CGOpenMPRuntime::emitMessageClause;
   llvm::Value *emitMessageClause(CodeGenFunction &CGF, const Expr *Message,
                                  SourceLocation Loc) override;
 
   // Currently unsupported on the device.
-  virtual llvm::Value *emitSeverityClause(OpenMPSeverityClauseKind Severity,
-                                          SourceLocation Loc) override;
+  using CGOpenMPRuntime::emitSeverityClause;
+  llvm::Value *emitSeverityClause(OpenMPSeverityClauseKind Severity,
+                                  SourceLocation Loc) override;
 
   /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
   /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp
index e19daa9..72a42a6 100644
--- a/clang/lib/Driver/Action.cpp
+++ b/clang/lib/Driver/Action.cpp
@@ -43,7 +43,7 @@ const char *Action::getClassName(ActionClass AC) {
   case OffloadUnbundlingJobClass:
     return "clang-offload-unbundler";
   case OffloadPackagerJobClass:
-    return "clang-offload-packager";
+    return "llvm-offload-binary";
   case LinkerWrapperJobClass:
     return "clang-linker-wrapper";
   case StaticLibJobClass:
diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h
index c227895..9adad5c 100644
--- a/clang/lib/Driver/ToolChains/Clang.h
+++ b/clang/lib/Driver/ToolChains/Clang.h
@@ -163,7 +163,7 @@ public:
 class LLVM_LIBRARY_VISIBILITY OffloadPackager final : public Tool {
 public:
   OffloadPackager(const ToolChain &TC)
-      : Tool("Offload::Packager", "clang-offload-packager", TC) {}
+      : Tool("Offload::Packager", "llvm-offload-binary", TC) {}
 
   bool hasIntegratedCPP() const override { return false; }
   void ConstructJob(Compilation &C, const JobAction &JA,
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index d951ba0..142cc07 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -112,7 +112,7 @@ static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ph(
                        e9, e8, e7, e6, e5, e4, e3, e2, e1, e0);
 }
 
-static __inline __m512h __DEFAULT_FN_ATTRS512
+static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_pch(_Float16 _Complex __h) {
   return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, __h));
 }
@@ -193,17 +193,17 @@ _mm512_castsi512_ph(__m512i __a) {
   return (__m512h)__a;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_castph256_ph128(__m256h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS512
+static __inline__ __m128h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castph512_ph128(__m512h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS512
+static __inline__ __m256h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castph512_ph256(__m512h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                                  12, 13, 14, 15);
diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index c0bcc08..5b2b3f0 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -34,11 +34,13 @@
 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
-static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
+static __inline__ _Float16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtsh_h(__m128h __a) {
   return __a[0];
 }
 
-static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) {
+static __inline__ _Float16 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtsh_h(__m256h __a) {
   return __a[0];
 }
 
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 999e302c..f4df63c 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -280,6 +280,11 @@ public:
     if (T->getDepth() >= TemplateArgs.getNumLevels())
       return true;
 
+    // There might not be a corresponding template argument before substituting
+    // into the parameter mapping, e.g. a sizeof... expression.
+    if (!TemplateArgs.hasTemplateArgument(T->getDepth(), T->getIndex()))
+      return true;
+
     TemplateArgument Arg = TemplateArgs(T->getDepth(), T->getIndex());
 
     if (T->isParameterPack() && SemaRef.ArgPackSubstIndex) {
@@ -300,6 +305,12 @@ public:
     if (!NTTP)
       return TraverseDecl(D);
 
+    if (NTTP->getDepth() >= TemplateArgs.getNumLevels())
+      return true;
+
+    if (!TemplateArgs.hasTemplateArgument(NTTP->getDepth(), NTTP->getIndex()))
+      return true;
+
     TemplateArgument Arg = TemplateArgs(NTTP->getDepth(), NTTP->getPosition());
     if (NTTP->isParameterPack() && SemaRef.ArgPackSubstIndex) {
       assert(Arg.getKind() == TemplateArgument::Pack &&
@@ -326,17 +337,25 @@ public:
     return inherited::TraverseDecl(D);
   }
 
+  bool TraverseCallExpr(CallExpr *CE) {
+    inherited::TraverseStmt(CE->getCallee());
+
+    for (Expr *Arg : CE->arguments())
+      inherited::TraverseStmt(Arg);
+
+    return true;
+  }
+
   bool TraverseTypeLoc(TypeLoc TL, bool TraverseQualifier = true) {
     // We don't care about TypeLocs. So traverse Types instead.
-    return TraverseType(TL.getType(), TraverseQualifier);
+    return TraverseType(TL.getType().getCanonicalType(), TraverseQualifier);
   }
 
   bool TraverseTagType(const TagType *T, bool TraverseQualifier) {
     // T's parent can be dependent while T doesn't have any template arguments.
     // We should have already traversed its qualifier.
     // FIXME: Add an assert to catch cases where we failed to profile the
-    // concept. assert(!T->isDependentType() && "We missed a case in profiling
-    // concepts!");
+    // concept.
     return true;
   }
 
@@ -701,7 +720,6 @@ ExprResult ConstraintSatisfactionChecker::Evaluate(
 
   if (auto Iter = S.UnsubstitutedConstraintSatisfactionCache.find(ID);
       Iter != S.UnsubstitutedConstraintSatisfactionCache.end()) {
-
     auto &Cached = Iter->second.Satisfaction;
     Satisfaction.ContainsErrors = Cached.ContainsErrors;
     Satisfaction.IsSatisfied = Cached.IsSatisfied;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 5657dfe..8d32ef6 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1087,14 +1087,14 @@ static bool shouldAddReversedEqEq(Sema &S, SourceLocation OpLoc,
 }
 
 bool OverloadCandidateSet::OperatorRewriteInfo::allowsReversed(
-    OverloadedOperatorKind Op) {
+    OverloadedOperatorKind Op) const {
   if (!AllowRewrittenCandidates)
     return false;
   return Op == OO_EqualEqual || Op == OO_Spaceship;
 }
 
 bool OverloadCandidateSet::OperatorRewriteInfo::shouldAddReversed(
-    Sema &S, ArrayRef<Expr *> OriginalArgs, FunctionDecl *FD) {
+    Sema &S, ArrayRef<Expr *> OriginalArgs, FunctionDecl *FD) const {
   auto Op = FD->getOverloadedOperator();
   if (!allowsReversed(Op))
     return false;
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 83d79b43..70baab5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -3812,6 +3812,15 @@ bool MallocChecker::mayFreeAnyEscapedMemoryOrIsModeledExplicitly(
     return true;
   }
 
+  // Protobuf function declared in `generated_message_util.h` that takes
+  // ownership of the second argument. As the first and third arguments are
+  // allocation arenas and won't be tracked by this checker, there is no reason
+  // to set `EscapingSymbol`. (Also, this is an implementation detail of
+  // Protobuf, so it's better to be a bit more permissive.)
+  if (FName == "GetOwnedMessageInternal") {
+    return true;
+  }
+
   // Handle cases where we know a buffer's /address/ can escape.
   // Note that the above checks handle some special cases where we know that
   // even though the address escapes, it's still our responsibility to free the
diff --git a/clang/test/AST/ByteCode/builtin-object-size.cpp b/clang/test/AST/ByteCode/builtin-object-size.cpp
index 6f4ef54..e4433ea 100644
--- a/clang/test/AST/ByteCode/builtin-object-size.cpp
+++ b/clang/test/AST/ByteCode/builtin-object-size.cpp
@@ -17,7 +17,8 @@ static_assert(__builtin_object_size(&arrf, 0) == (sizeof(float)*2), "");
 static_assert(__builtin_object_size(&arrf[1], 0) == sizeof(float), "");
 static_assert(__builtin_object_size(&arrf[2], 0) == 0, "");
 
-
+constexpr struct { int a; int b; } F{};
+static_assert(__builtin_object_size(&F.a, 3) == sizeof(int));
 
 struct S {
   int a;
diff --git a/clang/test/Analysis/Inputs/system-header-simulator-for-protobuf.h b/clang/test/Analysis/Inputs/system-header-simulator-for-protobuf.h
new file mode 100644
index 0000000..cb12b55
--- /dev/null
+++ b/clang/test/Analysis/Inputs/system-header-simulator-for-protobuf.h
@@ -0,0 +1,18 @@
+// Like the compiler, the static analyzer treats some functions differently if
+// they come from a system header -- for example, it is assumed that system
+// functions do not arbitrarily free() their parameters, and that some bugs
+// found in system headers cannot be fixed by the user and should be
+// suppressed.
+#pragma clang system_header
+
+class Arena;
+class MessageLite {
+  int SomeArbitraryField;
+};
+
+// Originally declared in generated_message_util.h
+MessageLite *GetOwnedMessageInternal(Arena *, MessageLite *, Arena *);
+
+// Not a real protobuf function -- just introduced to validate that this file
+// is handled as a system header.
+void SomeOtherFunction(MessageLite *);
diff --git a/clang/test/Analysis/NewDeleteLeaks.cpp b/clang/test/Analysis/NewDeleteLeaks.cpp
index b2bad7e..d9c4b77 100644
--- a/clang/test/Analysis/NewDeleteLeaks.cpp
+++ b/clang/test/Analysis/NewDeleteLeaks.cpp
@@ -13,6 +13,8 @@
 // RUN:     unix.DynamicMemoryModeling:AddNoOwnershipChangeNotes=true
 
 #include "Inputs/system-header-simulator-for-malloc.h"
+// For the tests in namespace protobuf_leak:
+#include "Inputs/system-header-simulator-for-protobuf.h"
 
 //===----------------------------------------------------------------------===//
 // Report for which we expect NoOwnershipChangeVisitor to add a new note.
@@ -218,3 +220,34 @@ void caller() {
   (void)n;
 } // no-warning: No potential memory leak here, because that's been already reported.
 } // namespace symbol_reaper_lifetime
+
+// Check that we do not report false positives in automatically generated
+// protobuf code that passes dynamically allocated memory to a certain function
+// named GetOwnedMessageInternal.
+namespace protobuf_leak {
+Arena *some_arena, *some_submessage_arena;
+
+MessageLite *protobuf_leak() {
+  MessageLite *p = new MessageLite(); // Real protobuf code instantiates a
+                                      // subclass of MessageLite, but that's
+                                      // not relevant for the bug.
+  MessageLite *q = GetOwnedMessageInternal(some_arena, p, some_submessage_arena);
+  return q;
+  // No leak at end of function -- the pointer escapes in GetOwnedMessageInternal.
+}
+
+void validate_system_header() {
+  // The case protobuf_leak would also pass if GetOwnedMessageInternal wasn't
+  // declared in a system header. This test verifies that another function
+  // declared in the same header behaves differently (doesn't escape memory) to
+  // demonstrate that GetOwnedMessageInternal is indeed explicitly recognized
+  // by the analyzer.
+
+  // expected-note@+1 {{Memory is allocated}}
+  MessageLite *p = new MessageLite();
+  SomeOtherFunction(p);
+  // expected-warning@+2 {{Potential leak of memory pointed to by 'p'}}
+  // expected-note@+1    {{Potential leak of memory pointed to by 'p'}}
+}
+
+} // namespace protobuf_leak
diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp
index 73c05b3..083d438 100644
--- a/clang/test/CIR/CodeGen/complex.cpp
+++ b/clang/test/CIR/CodeGen/complex.cpp
@@ -1359,3 +1359,49 @@ void complex_type_argument() {
 // OGCG: store float %[[A_IMAG]], ptr %[[ARG_IMAG_PTR]], align 4
 // OGCG: %[[TMP_ARG:.*]] = load <2 x float>, ptr %[[ARG_ADDR]], align 4
 // OGCG: call void @_Z22complex_type_parameterCf(<2 x float> noundef %[[TMP_ARG]])
+
+void real_on_scalar_bool() {
+  bool a;
+  bool b = __real__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.bool -> !cir.bool
+// CIR: cir.store{{.*}} %[[A_REAL]], %[[B_ADDR]] : !cir.bool, !cir.ptr<!cir.bool>
+
+// LLVM: %[[A_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[B_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[TMP_A:.*]] = load i8, ptr %[[A_ADDR]], align 1
+// LLVM: %[[TMP_A_I1:.*]] = trunc i8 %[[TMP_A]] to i1
+// LLVM: %[[TMP_A_I8:.*]] = zext i1 %[[TMP_A_I1]] to i8
+// LLVM: store i8 %[[TMP_A_I8]], ptr %[[B_ADDR]], align 1
+
+// OGCG: %[[A_ADDR:.*]] = alloca i8, align 1
+// OGCG: %[[B_ADDR:.*]] = alloca i8, align 1
+// OGCG: %[[TMP_A:.*]] = load i8, ptr %[[A_ADDR]], align 1
+// OGCG: %[[TMP_A_I1:.*]] = trunc i8 %[[TMP_A]] to i1
+// OGCG: %[[TMP_A_I8:.*]] = zext i1 %[[TMP_A_I1]] to i8
+// OGCG: store i8 %[[TMP_A_I8]], ptr %[[B_ADDR]], align 1
+
+void imag_on_scalar_bool() {
+  bool a;
+  bool b = __imag__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.bool -> !cir.bool
+// CIR: cir.store{{.*}} %[[A_IMAG]], %[[B_ADDR]] : !cir.bool, !cir.ptr<!cir.bool>
+
+// LLVM: %[[A_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[B_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[TMP_A:.*]] = load i8, ptr %[[A_ADDR]], align 1
+// LLVM: %[[TMP_A_I1:.*]] = trunc i8 %[[TMP_A]] to i1
+// LLVM: store i8 0, ptr %[[B_ADDR]], align 1
+
+// OGCG: %[[A_ADDR:.*]] = alloca i8, align 1
+// OGCG: %[[B_ADDR:.*]] = alloca i8, align 1
+// OGCG: store i8 0, ptr %[[B_ADDR]], align 1
diff --git a/clang/test/CIR/CodeGen/constant-inits.cpp b/clang/test/CIR/CodeGen/constant-inits.cpp
index c9153c91..d5a7bb9 100644
--- a/clang/test/CIR/CodeGen/constant-inits.cpp
+++ b/clang/test/CIR/CodeGen/constant-inits.cpp
@@ -30,6 +30,41 @@ struct simple {
     int a, b;
 };
 
+// Byte-aligned bitfields
+struct byte_aligned_bitfields {
+    unsigned int a : 8;
+    unsigned int b : 8;
+    unsigned int c : 16;
+};
+
+struct signed_byte_aligned_bitfields {
+    int x : 8;
+    int y : 8;
+};
+
+struct single_byte_bitfield {
+    unsigned char a : 8;
+};
+
+// Partial bitfields (sub-byte)
+struct partial_bitfields {
+    unsigned int a : 3;
+    unsigned int b : 5;
+    unsigned int c : 8;
+};
+
+struct signed_partial_bitfields {
+    int x : 4;
+    int y : 4;
+};
+
+struct mixed_partial_bitfields {
+    unsigned char a : 1;
+    unsigned char b : 1;
+    unsigned char c : 1;
+    unsigned char d : 5;
+};
+
 void function() {
     constexpr static empty e;
 
@@ -54,8 +89,22 @@ void function() {
     constexpr static simple simple_array[] {
         s, {1111, 2222}, s
     };
+
+    // Byte-aligned bitfield tests
+    constexpr static byte_aligned_bitfields ba_bf1 = {0xFF, 0xAA, 0x1234};
+    constexpr static signed_byte_aligned_bitfields ba_bf2 = {-1, 127};
+    constexpr static single_byte_bitfield ba_bf3 = {42};
+
+    // Partial bitfield tests
+    constexpr static partial_bitfields p_bf1 = {1, 2, 3};
+    constexpr static signed_partial_bitfields p_bf2 = {-1, 7};
+    constexpr static mixed_partial_bitfields p_bf3 = {1, 0, 1, 15};
 }
 
+// Anonymous struct type definitions for bitfields
+// CIR-DAG: !rec_anon_struct = !cir.record<struct  {!u8i, !u8i, !u8i, !u8i}>
+// CIR-DAG: !rec_anon_struct1 = !cir.record<struct  {!u8i, !u8i, !cir.array<!u8i x 2>}>
+
 // CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE1e = #cir.zero : !rec_empty
 // CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE1s = #cir.const_record<{#cir.int<0> : !s32i, #cir.int<-1> : !s32i}> : !rec_simple
 // CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE2p1 = #cir.const_record<{#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.const_array<[#cir.int<99> : !s8i, #cir.int<88> : !s8i, #cir.int<77> : !s8i]> : !cir.array<!s8i x 3>, #cir.int<40> : !s32i}> : !rec_Point
@@ -83,6 +132,33 @@ void function() {
 // CIR-DAG-SAME:   #cir.zero : !rec_packed_and_aligned
 // CIR-DAG-SAME: ]> : !cir.array<!rec_packed_and_aligned x 2>
 
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf1 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<255> : !u8i,
+// CIR-DAG-SAME:   #cir.int<170> : !u8i,
+// CIR-DAG-SAME:   #cir.int<52> : !u8i,
+// CIR-DAG-SAME:   #cir.int<18> : !u8i
+// CIR-DAG-SAME: }> : !rec_anon_struct
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf2 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<255> : !u8i,
+// CIR-DAG-SAME:   #cir.int<127> : !u8i,
+// CIR-DAG-SAME:   #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>
+// CIR-DAG-SAME: }> : !rec_anon_struct1
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE6ba_bf3 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<42> : !u8i
+// CIR-DAG-SAME: }> : !rec_single_byte_bitfield
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf1 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<17> : !u8i,
+// CIR-DAG-SAME:   #cir.int<3> : !u8i,
+// CIR-DAG-SAME:   #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>
+// CIR-DAG-SAME: }> : !rec_anon_struct1
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf2 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<127> : !u8i,
+// CIR-DAG-SAME:   #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>
+// CIR-DAG-SAME: }> : !rec_signed_partial_bitfields
+// CIR-DAG: cir.global "private" internal dso_local @_ZZ8functionvE5p_bf3 = #cir.const_record<{
+// CIR-DAG-SAME:   #cir.int<125> : !u8i
+// CIR-DAG-SAME: }> : !rec_mixed_partial_bitfields
+
 // CIR-LABEL: cir.func dso_local @_Z8functionv()
 // CIR:   cir.return
 
@@ -96,6 +172,12 @@ void function() {
 // LLVM-DAG: @_ZZ8functionvE3paa = internal global %struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }>
 // LLVM-DAG: @_ZZ8functionvE5array = internal global [2 x %struct.Point] [%struct.Point { i32 123, i32 456, [3 x i8] c"\0B\16!", i32 789 }, %struct.Point { i32 10, i32 20, [3 x i8] zeroinitializer, i32 40 }]
 // LLVM-DAG: @_ZZ8functionvE9paa_array = internal global [2 x %struct.packed_and_aligned] [%struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 0 }>, %struct.packed_and_aligned zeroinitializer]
+// LLVM-DAG: @_ZZ8functionvE6ba_bf1 = internal global { i8, i8, i8, i8 } { i8 -1, i8 -86, i8 52, i8 18 }
+// LLVM-DAG: @_ZZ8functionvE6ba_bf2 = internal global { i8, i8, [2 x i8] } { i8 -1, i8 127, [2 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE6ba_bf3 = internal global %struct.single_byte_bitfield { i8 42 }
+// LLVM-DAG: @_ZZ8functionvE5p_bf1 = internal global { i8, i8, [2 x i8] } { i8 17, i8 3, [2 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE5p_bf2 = internal global %struct.signed_partial_bitfields { i8 127, [3 x i8] zeroinitializer }
+// LLVM-DAG: @_ZZ8functionvE5p_bf3 = internal global %struct.mixed_partial_bitfields { i8 125 }
 
 // LLVM-LABEL: define{{.*}} void @_Z8functionv
 // LLVM:   ret void
@@ -110,6 +192,12 @@ void function() {
 // OGCG-DAG: @_ZZ8functionvE3paa = internal constant %struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 undef }>
 // OGCG-DAG: @_ZZ8functionvE5array = internal constant [2 x %struct.Point] [%struct.Point { i32 123, i32 456, [3 x i8] c"\0B\16!", i32 789 }, %struct.Point { i32 10, i32 20, [3 x i8] zeroinitializer, i32 40 }]
 // OGCG-DAG: @_ZZ8functionvE9paa_array = internal constant [2 x %struct.packed_and_aligned] [%struct.packed_and_aligned <{ i16 1, i8 2, float 3.000000e+00, i8 undef }>, %struct.packed_and_aligned <{ i16 0, i8 0, float 0.000000e+00, i8 undef }>]
+// OGCG-DAG: @_ZZ8functionvE6ba_bf1 = internal constant { i8, i8, i8, i8 } { i8 -1, i8 -86, i8 52, i8 18 }
+// OGCG-DAG: @_ZZ8functionvE6ba_bf2 = internal constant { i8, i8, [2 x i8] } { i8 -1, i8 127, [2 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE6ba_bf3 = internal constant %struct.single_byte_bitfield { i8 42 }
+// OGCG-DAG: @_ZZ8functionvE5p_bf1 = internal constant { i8, i8, [2 x i8] } { i8 17, i8 3, [2 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE5p_bf2 = internal constant %struct.signed_partial_bitfields { i8 127, [3 x i8] undef }
+// OGCG-DAG: @_ZZ8functionvE5p_bf3 = internal constant %struct.mixed_partial_bitfields { i8 125 }
 
 // OGCG-LABEL: define{{.*}} void @_Z8functionv
 // OGCG:   ret void
diff --git a/clang/test/CIR/CodeGen/throws.cpp b/clang/test/CIR/CodeGen/throws.cpp
index 0122f30..ff6aa62 100644
--- a/clang/test/CIR/CodeGen/throws.cpp
+++ b/clang/test/CIR/CodeGen/throws.cpp
@@ -5,7 +5,7 @@
 // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -emit-llvm %s -o %t.ll
 // RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
 
-void foo() {
+void rethrow() {
   throw;
 }
 
@@ -18,7 +18,7 @@ void foo() {
 // OGCG: call void @__cxa_rethrow()
 // OGCG: unreachable
 
-int foo1(int a, int b) {
+int rethrow_from_block(int a, int b) {
   if (b == 0)
     throw;
   return a / b;
@@ -83,3 +83,43 @@ int foo1(int a, int b) {
 // OGCG:  %[[TMP_B:.*]] = load i32, ptr %[[B_ADDR]], align 4
 // OGCG:  %[[DIV_A_B:.*]] = sdiv i32 %[[TMP_A]], %[[TMP_B]]
 // OGCG:  ret i32 %[[DIV_A_B]]
+
+void throw_scalar() { 
+  throw 1;
+}
+
+// CIR: %[[EXCEPTION_ADDR:.*]] = cir.alloc.exception 4 -> !cir.ptr<!s32i>
+// CIR: %[[EXCEPTION_VALUE:.*]] = cir.const #cir.int<1> : !s32i
+// CIR: cir.store{{.*}} %[[EXCEPTION_VALUE]], %[[EXCEPTION_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: cir.throw %[[EXCEPTION_ADDR]] : !cir.ptr<!s32i>, @_ZTIi
+// CIR: cir.unreachable
+
+// LLVM: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 4)
+// LLVM: store i32 1, ptr %[[EXCEPTION_ADDR]], align 16
+// LLVM: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIi, ptr null)
+// LLVM: unreachable
+
+// OGCG: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 4)
+// OGCG: store i32 1, ptr %[[EXCEPTION_ADDR]], align 16
+// OGCG: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIi, ptr null)
+// OGCG: unreachable
+
+void paren_expr() { (throw 0, 1 + 2); }
+
+// CIR:   %[[EXCEPTION_ADDR:.*]] = cir.alloc.exception 4 -> !cir.ptr<!s32i>
+// CIR:   %[[EXCEPTION_VALUE:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:   cir.store{{.*}} %[[EXCEPTION_VALUE]], %[[EXCEPTION_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR:   cir.throw %[[EXCEPTION_ADDR]] : !cir.ptr<!s32i>, @_ZTIi
+// CIR:   cir.unreachable
+// CIR: ^bb1:
+// CIR:   %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:   %[[CONST_2:.*]] = cir.const #cir.int<2> : !s32i
+// CIR:   %[[ADD:.*]] = cir.binop(add, %[[CONST_1]], %[[CONST_2]]) nsw : !s32i
+
+// LLVM: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 4)
+// LLVM: store i32 0, ptr %[[EXCEPTION_ADDR]], align 16
+// LLVM: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIi, ptr null)
+
+// OGCG: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 4)
+// OGCG: store i32 0, ptr %[[EXCEPTION_ADDR]], align 16
+// OGCG: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIi, ptr null)
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index e9f4f83..bcb6bd6 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -103,7 +103,6 @@ list(APPEND CLANG_TEST_DEPS
   clang-linker-wrapper
   clang-nvlink-wrapper
   clang-offload-bundler
-  clang-offload-packager
   clang-sycl-linker
   diagtool
   hmaptool
@@ -173,6 +172,7 @@ if( NOT CLANG_BUILT_STANDALONE )
     llvm-strip
     llvm-symbolizer
     llvm-windres
+    llvm-offload-binary
     obj2yaml
     opt
     split-file
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index dbf89b3..2befff0 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -117,6 +117,7 @@ __m512h test_mm512_set1_pch(_Float16 _Complex h) {
   // CHECK: bitcast <16 x float>{{.*}} to <32 x half>
   return _mm512_set1_pch(h);
 }
+TEST_CONSTEXPR(match_m512h(_mm512_set1_pch(1.0), 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0));
 
 __m512h test_mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
                           _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
@@ -340,18 +341,21 @@ __m128h test_mm256_castph256_ph128(__m256h __a) {
   // CHECK: shufflevector <16 x half> %{{.*}}, <16 x half> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_castph256_ph128(__a);
 }
+TEST_CONSTEXPR(match_m128h(_mm256_castph256_ph128((__m256h){-1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0}), -1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0));
 
 __m128h test_mm512_castph512_ph128(__m512h __a) {
   // CHECK-LABEL: test_mm512_castph512_ph128
   // CHECK: shufflevector <32 x half> %{{.*}}, <32 x half> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_castph512_ph128(__a);
 }
+TEST_CONSTEXPR(match_m128h(_mm512_castph512_ph128((__m512h){0.0, -1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0, -17.0, 18.0, -19.0, 20.0, -21.0, 22.0, -23.0, 24.0, -25.0, 26.0, -27.0, 28.0, -29.0, 30.0, -31.0}), 0.0, -1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0));
 
 __m256h test_mm512_castph512_ph256(__m512h __a) {
   // CHECK-LABEL: test_mm512_castph512_ph256
   // CHECK: shufflevector <32 x half> %{{.*}}, <32 x half> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_castph512_ph256(__a);
 }
+TEST_CONSTEXPR(match_m256h(_mm512_castph512_ph256((__m512h){-1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0, -17.0, 18.0, -19.0, 20.0, -21.0, 22.0, -23.0, 24.0, -25.0, 26.0, -27.0, 28.0, -29.0, 30.0, -31.0, 32.0}), -1.0, 2.0, -3.0, 4.0, -5.0, 6.0, -7.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0, 14.0, -15.0, -16.0));
 
 __m256h test_mm256_castph128_ph256(__m128h __a) {
   // CHECK-LABEL: test_mm256_castph128_ph256
diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
index f1865aa..68d0984 100644
--- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c
@@ -17,12 +17,14 @@ _Float16 test_mm_cvtsh_h(__m128h __A) {
   // CHECK: extractelement <8 x half> %{{.*}}, i32 0
   return _mm_cvtsh_h(__A);
 }
+TEST_CONSTEXPR(_mm_cvtsh_h((__m128h){-8.0, 7.0, -6.0, 5.0, -4.0, 3.0, -2.0, 1.0}) == -8.0);
 
 _Float16 test_mm256_cvtsh_h(__m256h __A) {
   // CHECK-LABEL: test_mm256_cvtsh_h
   // CHECK: extractelement <16 x half> %{{.*}}, i32 0
   return _mm256_cvtsh_h(__A);
 }
+TEST_CONSTEXPR(_mm256_cvtsh_h((__m256h){-32.0, 31.0, -30.0, 29.0, -28.0, 27.0, -26.0, 25.0, -24.0, 23.0, -22.0, 21.0, -20.0, 19.0, -18.0, 17.0}) == -32.0);
 
 __m128h test_mm_set_sh(_Float16 __h) {
   // CHECK-LABEL: test_mm_set_sh
diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
index 26d17e7..3dd8a36 100644
--- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
+++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c
@@ -25,12 +25,6 @@
 // CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 15 } }
 // CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 15 } }
 // CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 15 } }
-// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 15 } }
-// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 15 } }
-// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 15 } }
-// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 15 } }
-// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 15 } }
-// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 15 } }
 // CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1700:.*]] = {{.*}}, i32 1700, i32 15 } }
 // CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1800:.*]] = {{.*}}, i32 1800, i32 20 } }
 
@@ -225,172 +219,6 @@ char *nullptr_allones_BAD(void) {
 
 //------------------------------------------------------------------------------
 
-char *one_var(unsigned long offset) {
-  // CHECK:                           define{{.*}} ptr @one_var(i64 noundef %[[OFFSET:.*]])
-  // CHECK-NEXT:                      [[ENTRY:.*]]:
-  // CHECK-NEXT:                        %[[OFFSET_ADDR:.*]] = alloca i64, align 8
-  // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr inttoptr (i64 1 to ptr), i64 %[[OFFSET_RELOADED]]
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP:.*]] = add i64 1, %[[COMPUTED_OFFSET]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[OTHER_IS_NOT_NULL:.*]] = icmp ne ptr inttoptr (i64 1 to ptr), null
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[OTHER_IS_NOT_NULL]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[OR_OV]], true, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], 1, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
-  // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_900]], i64 1, i64 %[[COMPUTED_GEP]])
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_900]], i64 1, i64 %[[COMPUTED_GEP]])
-  // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
-  // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
-  // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr %[[ADD_PTR]]
-  static char *const base = (char *)1;
-#line 900
-  return base + offset;
-}
-
-char *one_zero(void) {
-  // CHECK:                             define{{.*}} ptr @one_zero()
-  // CHECK-NEXT:                        [[ENTRY:.*]]:
-  // CHECK-NEXT:                          ret ptr inttoptr (i64 1 to ptr)
-  static char *const base = (char *)1;
-  static const unsigned long offset = 0;
-#line 1000
-  return base + offset;
-}
-
-char *one_one_OK(void) {
-  // CHECK:                           define{{.*}} ptr @one_one_OK()
-  // CHECK-NEXT:                      [[ENTRY:.*]]:
-  // CHECK-SANITIZE-NEXT:               %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1), 0, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
-  // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1100]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1) to i64), i64 1), i64 1))
-  // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
-  // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
-  // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 1)
-  static char *const base = (char *)1;
-  static const unsigned long offset = 1;
-#line 1100
-  return base + offset;
-}
-
-char *one_allones_BAD(void) {
-  // CHECK:                           define{{.*}} ptr @one_allones_BAD()
-  // CHECK-NEXT:                      [[ENTRY:.*]]:
-  // CHECK-SANITIZE-NEXT:               %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1), 0, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
-  // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1200]], i64 1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1) to i64), i64 1), i64 1))
-  // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
-  // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
-  // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 1 to ptr), i64 -1)
-  static char *const base = (char *)1;
-  static const unsigned long offset = -1;
-#line 1200
-  return base + offset;
-}
-
-//------------------------------------------------------------------------------
-
-char *allones_var(unsigned long offset) {
-  // CHECK:                           define{{.*}} ptr @allones_var(i64 noundef %[[OFFSET:.*]])
-  // CHECK-NEXT:                      [[ENTRY:.*]]:
-  // CHECK-NEXT:                        %[[OFFSET_ADDR:.*]] = alloca i64, align 8
-  // CHECK-NEXT:                        store i64 %[[OFFSET]], ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, ptr %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[ADD_PTR:.*]] = getelementptr inbounds nuw i8, ptr inttoptr (i64 -1 to ptr), i64 %[[OFFSET_RELOADED]]
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[OR_OV:.+]] = or i1 %[[COMPUTED_OFFSET_OVERFLOWED]], false, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP:.*]] = add i64 -1, %[[COMPUTED_OFFSET]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[OTHER_IS_NOT_NULL:.*]] = icmp ne ptr inttoptr (i64 -1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[OTHER_IS_NOT_NULL]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[OR_OV]], true, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], -1, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
-  // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1300]], i64 -1, i64 %[[COMPUTED_GEP]])
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1300]], i64 -1, i64 %[[COMPUTED_GEP]])
-  // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
-  // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
-  // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr %[[ADD_PTR]]
-  static char *const base = (char *)-1;
-#line 1300
-  return base + offset;
-}
-
-char *allones_zero_OK(void) {
-  // CHECK:                             define{{.*}} ptr @allones_zero_OK()
-  // CHECK-NEXT:                        [[ENTRY:.*]]:
-  // CHECK-NEXT:                          ret ptr inttoptr (i64 -1 to ptr)
-  static char *const base = (char *)-1;
-  static const unsigned long offset = 0;
-#line 1400
-  return base + offset;
-}
-
-char *allones_one_BAD(void) {
-  // CHECK: define{{.*}} ptr @allones_one_BAD()
-  // CHECK-NEXT: [[ENTRY:.*]]:
-  // CHECK-SANITIZE-NEXT:               %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 -1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1), 0, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
-  // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1500]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1) to i64), i64 -1), i64 -1))
-  // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
-  // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
-  // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 1)
-  static char *const base = (char *)-1;
-  static const unsigned long offset = 1;
-#line 1500
-  return base + offset;
-}
-
-char *allones_allones_OK(void) {
-  // CHECK: define{{.*}} ptr @allones_allones_OK()
-  // CHECK-NEXT: [[ENTRY:.*]]:
-  // CHECK-SANITIZE-NEXT:               %[[CMP1:.*]] = icmp ne ptr inttoptr (i64 -1 to ptr), null, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[CMP2:.*]] = icmp ne i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1), 0, !nosanitize
-  // CHECK-SANITIZE-NEXT:               %[[COND:.*]] = icmp eq i1 %[[CMP1]], %[[CMP2]], !nosanitize
-  // CHECK-SANITIZE-NEXT:               br i1 %[[COND]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize
-  // CHECK-SANITIZE:                  [[HANDLER_POINTER_OVERFLOW]]:
-  // CHECK-SANITIZE-NORECOVER-NEXT:     call void @__ubsan_handle_pointer_overflow_abort(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1))
-  // CHECK-SANITIZE-RECOVER-NEXT:       call void @__ubsan_handle_pointer_overflow(ptr @[[LINE_1600]], i64 -1, i64 add (i64 sub (i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1) to i64), i64 -1), i64 -1))
-  // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.ubsantrap(i8 19){{.*}}, !nosanitize
-  // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
-  // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        ret ptr getelementptr inbounds nuw (i8, ptr inttoptr (i64 -1 to ptr), i64 -1)
-  static char *const base = (char *)-1;
-  static const unsigned long offset = -1;
-#line 1600
-  return base + offset;
-}
-
 // C++ does not allow void* arithmetic even as a GNU extension. Replace void*
 // with char* in that case to keep test expectations the same.
 #ifdef __cplusplus
diff --git a/clang/test/CodeGenCXX/builtin-invoke.cpp b/clang/test/CodeGenCXX/builtin-invoke.cpp
index af66dfd4d..0f84f83 100644
--- a/clang/test/CodeGenCXX/builtin-invoke.cpp
+++ b/clang/test/CodeGenCXX/builtin-invoke.cpp
@@ -55,7 +55,7 @@ extern "C" void call_memptr(std::reference_wrapper<Callable> wrapper) {
   // CHECK-NEXT:   br label %memptr.end
   // CHECK-EMPTY:
   // CHECK-NEXT: memptr.end:
-  // CHECK-NEXT:   %2 = phi ptr [ %memptr.virtualfn, %memptr.virtual ], [ @_ZN8Callable4funcEv, %memptr.nonvirtual ]
+  // CHECK-NEXT:   %2 = phi ptr [ %memptr.virtualfn, %memptr.virtual ], [ inttoptr (i64 ptrtoint (ptr @_ZN8Callable4funcEv to i64) to ptr), %memptr.nonvirtual ]
   // CHECK-NEXT:   call void %2(ptr noundef nonnull align 1 dereferenceable(1) %0)
   // CHECK-NEXT:   ret void
 }
diff --git a/clang/test/Driver/amdgpu-openmp-sanitize-options.c b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
index 985eca1..914e018 100644
--- a/clang/test/Driver/amdgpu-openmp-sanitize-options.c
+++ b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
@@ -59,6 +59,6 @@
 // GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-mlink-bitcode-file" "[^"]*asanrtl.bc".* "-mlink-bitcode-file" "[^"]*ockl.bc".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-fsanitize=address".* "-x" "c".*}}
 // NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-x" "c".*}}
 
-// SAN: {{"[^"]*clang-offload-packager[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx908(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
+// SAN: {{"[^"]*llvm-offload-binary[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx908(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
 // SAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "--offload-targets=amdgcn-amd-amdhsa".* "-x" "ir".*}}
 // SAN: {{"[^"]*clang-linker-wrapper[^"]*".* "--host-triple=x86_64-unknown-linux-gnu".* "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c
index 1091e6e..5e73e2d 100644
--- a/clang/test/Driver/amdgpu-openmp-toolchain.c
+++ b/clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -22,7 +22,7 @@
 // CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa:gfx906)" {5}, ir
 // CHECK-PHASES: 7: backend, {6}, ir, (device-openmp, gfx906)
 // CHECK-PHASES: 8: offload, "device-openmp (amdgcn-amd-amdhsa:gfx906)" {7}, ir
-// CHECK-PHASES: 9: clang-offload-packager, {8}, image, (device-openmp)
+// CHECK-PHASES: 9: llvm-offload-binary, {8}, image, (device-openmp)
 // CHECK-PHASES: 10: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {9}, ir
 // CHECK-PHASES: 11: backend, {10}, assembler, (host-openmp)
 // CHECK-PHASES: 12: assembler, {11}, object, (host-openmp)
@@ -64,7 +64,7 @@
 // RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
 // RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
 // CHECK-TARGET-ID: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a" "-target-feature" "-sramecc" "-target-feature" "+xnack"
-// CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a:sramecc-:xnack+,kind=openmp
+// CHECK-TARGET-ID: llvm-offload-binary{{.*}}arch=gfx90a:sramecc-:xnack+,kind=openmp
 
 // RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \
 // RUN:   -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID-ERROR
diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu
index 220a320..db7d29e 100644
--- a/clang/test/Driver/cuda-phases.cu
+++ b/clang/test/Driver/cuda-phases.cu
@@ -235,7 +235,7 @@
 // NEW-DRIVER-RDC-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
 // NEW-DRIVER-RDC-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
 // NEW-DRIVER-RDC-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
-// NEW-DRIVER-RDC-NEXT: 15: clang-offload-packager, {8, 14}, image, (device-cuda)
+// NEW-DRIVER-RDC-NEXT: 15: llvm-offload-binary, {8, 14}, image, (device-cuda)
 // NEW-DRIVER-RDC-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir
 // NEW-DRIVER-RDC-NEXT: 17: backend, {16}, assembler, (host-cuda)
 // NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda)
@@ -312,7 +312,7 @@
 // LTO-NEXT: 10: compiler, {9}, ir, (device-cuda, sm_70)
 // LTO-NEXT: 11: backend, {10}, lto-bc, (device-cuda, sm_70)
 // LTO-NEXT: 12: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {11}, lto-bc
-// LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-cuda)
+// LTO-NEXT: 13: llvm-offload-binary, {7, 12}, image, (device-cuda)
 // LTO-NEXT: 14: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {13}, ir
 // LTO-NEXT: 15: backend, {14}, assembler, (host-cuda)
 // LTO-NEXT: 16: assembler, {15}, object, (host-cuda)
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index 6bac97a..13f682f 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -40,7 +40,7 @@
 // OLD-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
 // NEW-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, ir
 // OLDN-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
-// NEW-DAG: [[P10:[0-9]+]]: clang-offload-packager, {[[P9]]}, image, (device-[[T]])
+// NEW-DAG: [[P10:[0-9]+]]: llvm-offload-binary, {[[P9]]}, image, (device-[[T]])
 // OLDR-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])
 
 // OLDN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
@@ -665,7 +665,7 @@
 // LTO-NEXT: 10: compiler, {9}, ir, (device-hip, gfx90a)
 // LTO-NEXT: 11: backend, {10}, lto-bc, (device-hip, gfx90a)
 // LTO-NEXT: 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {11}, lto-bc
-// LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-hip)
+// LTO-NEXT: 13: llvm-offload-binary, {7, 12}, image, (device-hip)
 // LTO-NEXT: 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (x86_64-unknown-linux-gnu)" {13}, ir
 // LTO-NEXT: 15: backend, {14}, assembler, (host-hip)
 // LTO-NEXT: 16: assembler, {15}, object, (host-hip)
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip
index dc8f0a9..a94299e 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -97,7 +97,7 @@
 // OLD-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900"
 // OLD-SAME: "-input={{.*}}" "-input=[[IMG_DEV_A_803]]" "-input=[[IMG_DEV_A_900]]" "-output=[[BUNDLE_A:.*hipfb]]"
 
-// NEW: [[PACKAGER:".*clang-offload-packager"]] "-o" "[[PACKAGE_A:.*.out]]"
+// NEW: [[PACKAGER:".*llvm-offload-binary"]] "-o" "[[PACKAGE_A:.*.out]]"
 // NEW-SAME: "--image=file=[[OBJ_DEV_A_803]],triple=amdgcn-amd-amdhsa,arch=gfx803,kind=hip"
 // NEW-SAME: "--image=file=[[OBJ_DEV_A_900]],triple=amdgcn-amd-amdhsa,arch=gfx900,kind=hip"
 
@@ -169,7 +169,7 @@
 // OLD-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900"
 // OLD-SAME: "-input={{.*}}" "-input=[[IMG_DEV_B_803]]" "-input=[[IMG_DEV_B_900]]" "-output=[[BUNDLE_B:.*hipfb]]"
 
-// NEW: [[PACKAGER:".*clang-offload-packager"]] "-o" "[[PACKAGE_B:.*.out]]"
+// NEW: [[PACKAGER:".*llvm-offload-binary"]] "-o" "[[PACKAGE_B:.*.out]]"
 // NEW-SAME: "--image=file=[[OBJ_DEV_B_803]],triple=amdgcn-amd-amdhsa,arch=gfx803,kind=hip"
 // NEW-SAME: "--image=file=[[OBJ_DEV_B_900]],triple=amdgcn-amd-amdhsa,arch=gfx900,kind=hip"
 
diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 3147617..b932712 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -5,7 +5,7 @@
 
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
 
-// RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
+// RUN: llvm-offload-binary -o %t.out --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
 // RUN:   -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
@@ -42,7 +42,7 @@
 // OPENMP-NEXT:   ret void
 // OPENMP-NEXT: }
 
-// RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70
+// RUN: llvm-offload-binary -o %t.out --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
 // RUN:   -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
@@ -153,7 +153,7 @@
 // CUDA-NEXT:   ret void
 // CUDA-NEXT: }
 
-// RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: llvm-offload-binary -o %t.out --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
 // RUN:   -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
@@ -265,7 +265,7 @@
 // HIP-NEXT:   ret void
 // HIP-NEXT: }
 
-// RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
+// RUN: llvm-offload-binary -o %t.out --image=file=%t.elf.o,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
 // RUN:   -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index 1c0fb96..52a961d 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -12,7 +12,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc
 // RUN: %clang -cc1 %s -triple spirv64-unknown-unknown -emit-llvm-bc -o %t.spirv.bc
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -24,7 +24,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // NVPTX-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.nvptx64.sm_70.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -33,7 +33,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // NVPTX-LINK-DEBUG: clang{{.*}} --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}-g
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -42,7 +42,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
 // RUN:   --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -51,7 +51,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -flto {{.*}}-save-temps
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.spirv.bc,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
@@ -59,7 +59,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // SPIRV-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.spirv64..img. --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch=
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -77,12 +77,12 @@ __attribute__((visibility("protected"), used)) int x;
 // HOST-LINK: ld.lld{{.*}}-a -b -c {{.*}}.o -o a.out
 // HOST-LINK-NOT: ld.lld{{.*}}-abc
 
-// RUN: clang-offload-packager -o %t-lib.out \
+// RUN: llvm-offload-binary -o %t-lib.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
 // RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-obj.o -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
@@ -91,7 +91,7 @@ __attribute__((visibility("protected"), used)) int x;
 // STATIC-LIBRARY: clang{{.*}} -march=sm_70
 // STATIC-LIBRARY-NOT: clang{{.*}} -march=sm_50
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52
@@ -105,7 +105,7 @@ __attribute__((visibility("protected"), used)) int x;
 // CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]]
 // CUDA: usr/bin/ld{{.*}} {{.*}}.openmp.image.{{.*}}.o {{.*}}.cuda.image.{{.*}}.o
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_80 \
 // RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_75 \
 // RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -119,7 +119,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // CUDA-PAR: fatbinary{{.*}}-64 --create {{.*}}.fatbin
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \
 // RUN:   --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
@@ -133,7 +133,7 @@ __attribute__((visibility("protected"), used)) int x;
 // HIP: clang{{.*}} -o [[IMG_GFX908:.+]] -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908
 // HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -compression-level=6 -targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx90a,hip-amdgcn-amd-amdhsa--gfx908 -input={{/dev/null|NUL}} -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
@@ -152,7 +152,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // MISSING-LIBRARY: error: unable to find library -ldummy
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -161,7 +161,7 @@ __attribute__((visibility("protected"), used)) int x;
 
 // CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-windows-msvc -emit-obj -o %t.o -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-windows-msvc --dry-run \
@@ -169,14 +169,14 @@ __attribute__((visibility("protected"), used)) int x;
 
 // COFF: "/usr/bin/lld-link" {{.*}}.o -libpath:./ -out:a.exe {{.*}}openmp.image.wrapper{{.*}}
 
-// RUN: clang-offload-packager -o %t-lib.out \
+// RUN: llvm-offload-binary -o %t-lib.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
 // RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t-on.out \
+// RUN: llvm-offload-binary -o %t-on.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a:xnack+
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-on.o -fembed-offload-object=%t-on.out
-// RUN: clang-offload-packager -o %t-off.out \
+// RUN: llvm-offload-binary -o %t-off.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a:xnack-
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t-off.o -fembed-offload-object=%t-off.out
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
@@ -185,14 +185,14 @@ __attribute__((visibility("protected"), used)) int x;
 // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
-// RUN: clang-offload-packager -o %t-lib.out \
+// RUN: llvm-offload-binary -o %t-lib.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out
 // RUN: llvm-ar rcs %t.a %t.o
-// RUN: clang-offload-packager -o %t1.out \
+// RUN: llvm-offload-binary -o %t1.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t1.o -fembed-offload-object=%t1.out
-// RUN: clang-offload-packager -o %t2.out \
+// RUN: llvm-offload-binary -o %t2.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t2.o -fembed-offload-object=%t2.out
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
@@ -201,7 +201,7 @@ __attribute__((visibility("protected"), used)) int x;
 // ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 // ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -213,7 +213,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RELOCATABLE-LINK: /usr/bin/ld.lld{{.*}}-r
 // RELOCATABLE-LINK: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a \
 // RUN:   --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -227,7 +227,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RELOCATABLE-LINK-HIP: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
 // RELOCATABLE-LINK-HIP: --rename-section llvm_offload_entries
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_89 \
 // RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_89
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
@@ -247,7 +247,7 @@ __attribute__((visibility("protected"), used)) int x;
 // OVERRIDE-NOT: clang
 // OVERRIDE: /usr/bin/ld
 
-// RUN: clang-offload-packager -o %t.out \
+// RUN: llvm-offload-binary -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
diff --git a/clang/test/Driver/offload-packager.c b/clang/test/Driver/offload-packager.c
index fb5f100..adf2565 100644
--- a/clang/test/Driver/offload-packager.c
+++ b/clang/test/Driver/offload-packager.c
@@ -7,26 +7,26 @@
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t/elf.o
 
 // Check that we can extract files from the packaged binary.
-// RUN: clang-offload-packager -o %t/package.out \
+// RUN: llvm-offload-binary -o %t/package.out \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_80 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90c 
-// RUN: clang-offload-packager %t/package.out \
+// RUN: llvm-offload-binary %t/package.out \
 // RUN:   --image=file=%t/sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t/gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
 // RUN: diff %t/sm_70.o %t/elf.o
 // RUN: diff %t/gfx908.o %t/elf.o
 
 // Check that we generate a new name if one is not given
-// RUN: clang-offload-packager -o %t/package \
+// RUN: llvm-offload-binary -o %t/package \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_80 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \
 // RUN:   --image=file=%t/elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90c 
-// RUN: clang-offload-packager %t/package --image=kind=openmp
+// RUN: llvm-offload-binary %t/package --image=kind=openmp
 // RUN: diff *-nvptx64-nvidia-cuda-sm_70.0.o %t/elf.o; rm *-nvptx64-nvidia-cuda-sm_70.0.o
 // RUN: diff *-nvptx64-nvidia-cuda-sm_80.1.o %t/elf.o; rm *-nvptx64-nvidia-cuda-sm_80.1.o
 // RUN: diff *-amdgcn-amd-amdhsa-gfx908.2.o %t/elf.o; rm *-amdgcn-amd-amdhsa-gfx908.2.o
@@ -34,33 +34,33 @@
 // RUN: not diff *-amdgcn-amd-amdhsa-gfx90c.4.o %t/elf.o
 
 // Check that we can extract from an ELF object file
-// RUN: clang-offload-packager -o %t/package.out \
+// RUN: llvm-offload-binary -o %t/package.out \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t/package.o -fembed-offload-object=%t/package.out
-// RUN: clang-offload-packager %t/package.out \
+// RUN: llvm-offload-binary %t/package.out \
 // RUN:   --image=file=%t/sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t/gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
 // RUN: diff %t/sm_70.o %t/elf.o
 // RUN: diff %t/gfx908.o %t/elf.o
 
 // Check that we can extract from a bitcode file
-// RUN: clang-offload-packager -o %t/package.out \
+// RUN: llvm-offload-binary -o %t/package.out \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -o %t/package.bc -fembed-offload-object=%t/package.out
-// RUN: clang-offload-packager %t/package.out \
+// RUN: llvm-offload-binary %t/package.out \
 // RUN:   --image=file=%t/sm_70.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%t/gfx908.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908
 // RUN: diff %t/sm_70.o %t/elf.o
 // RUN: diff %t/gfx908.o %t/elf.o
 
 // Check that we can extract from an archive file to an archive file.
-// RUN: clang-offload-packager -o %t/package.out \
+// RUN: llvm-offload-binary -o %t/package.out \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%t/elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t/package.o -fembed-offload-object=%t/package.out
 // RUN: llvm-ar rcs %t/package.a %t/package.o
-// RUN: clang-offload-packager %t/package.a --archive --image=file=%t/gfx908.a,arch=gfx908
+// RUN: llvm-offload-binary %t/package.a --archive --image=file=%t/gfx908.a,arch=gfx908
 // RUN: llvm-ar t %t/gfx908.a 2>&1 | FileCheck %s
 // CHECK: {{.*}}.o
diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c
index 77f4cfb..edce14e 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -242,7 +242,7 @@
 // CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp, sm_52)
 // CHECK-PHASES: 8: assembler, {7}, object, (device-openmp, sm_52)
 // CHECK-PHASES: 9: offload, "device-openmp (nvptx64-nvidia-cuda:sm_52)" {8}, object
-// CHECK-PHASES: 10: clang-offload-packager, {9}, image
+// CHECK-PHASES: 10: llvm-offload-binary, {9}, image
 // CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {10}, ir
 // CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
 // CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
@@ -346,13 +346,13 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 -nogpulib \
 // RUN:     -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-FEATURES %s
 
-// CHECK-LTO-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx{{[0-9]+}}
+// CHECK-LTO-FEATURES: llvm-offload-binary{{.*}}--image={{.*}}feature=+ptx{{[0-9]+}}
 
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp --offload-arch=sm_52 -nogpulib \
 // RUN:     -Xopenmp-target=nvptx64-nvidia-cuda --cuda-feature=+ptx64 -foffload-lto %s 2>&1 \
 // RUN:    | FileCheck --check-prefix=CHECK-SET-FEATURES %s
 
-// CHECK-SET-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx64
+// CHECK-SET-FEATURES: llvm-offload-binary{{.*}}--image={{.*}}feature=+ptx64
 
 //
 // Check that `-Xarch_host` works for OpenMP offloading.
diff --git a/clang/test/Driver/openmp-offload-jit.c b/clang/test/Driver/openmp-offload-jit.c
index b3566f0..6ced5c1 100644
--- a/clang/test/Driver/openmp-offload-jit.c
+++ b/clang/test/Driver/openmp-offload-jit.c
@@ -25,7 +25,7 @@
 // PHASES-JIT-NEXT: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp ([[TARGET:.+]])" {5}, ir
 // PHASES-JIT-NEXT: 7: backend, {6}, lto-bc, (device-openmp, {{.*}})
 // PHASES-JIT-NEXT: 8: offload, "device-openmp ([[TARGET]])" {7}, lto-bc
-// PHASES-JIT-NEXT: 9: clang-offload-packager, {8}, image, (device-openmp)
+// PHASES-JIT-NEXT: 9: llvm-offload-binary, {8}, image, (device-openmp)
 // PHASES-JIT-NEXT: 10: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {9}, ir
 // PHASES-JIT-NEXT: 11: backend, {10}, assembler, (host-openmp)
 // PHASES-JIT-NEXT: 12: assembler, {11}, object, (host-openmp)
diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c
index 64d45f9..fce1b88 100644
--- a/clang/test/Driver/openmp-offload.c
+++ b/clang/test/Driver/openmp-offload.c
@@ -103,7 +103,7 @@
 // CHK-PHASES-NEXT: 7: backend, {6}, assembler, (device-openmp)
 // CHK-PHASES-NEXT: 8: assembler, {7}, object, (device-openmp)
 // CHK-PHASES-NEXT: 9: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {8}, object
-// CHK-PHASES-NEXT: 10: clang-offload-packager, {9}, image, (device-openmp)
+// CHK-PHASES-NEXT: 10: llvm-offload-binary, {9}, image, (device-openmp)
 // CHK-PHASES-NEXT: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {2}, "device-openmp (powerpc64-ibm-linux-gnu)" {10}, ir
 // CHK-PHASES-NEXT: 12: backend, {11}, assembler, (host-openmp)
 // CHK-PHASES-NEXT: 13: assembler, {12}, object, (host-openmp)
@@ -132,7 +132,7 @@
 // CHK-PHASES-FILES-NEXT: 15: backend, {14}, assembler, (device-openmp)
 // CHK-PHASES-FILES-NEXT: 16: assembler, {15}, object, (device-openmp)
 // CHK-PHASES-FILES-NEXT: 17: offload, "device-openmp (x86_64-pc-linux-gnu)" {16}, object
-// CHK-PHASES-FILES-NEXT: 18: clang-offload-packager, {10, 17}, image, (device-openmp)
+// CHK-PHASES-FILES-NEXT: 18: llvm-offload-binary, {10, 17}, image, (device-openmp)
 // CHK-PHASES-FILES-NEXT: 19: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {18}, ir
 // CHK-PHASES-FILES-NEXT: 20: backend, {19}, assembler, (host-openmp)
 // CHK-PHASES-FILES-NEXT: 21: assembler, {20}, object, (host-openmp)
@@ -153,7 +153,7 @@
 // CHK-PHASES-FILES-NEXT: 36: backend, {35}, assembler, (device-openmp)
 // CHK-PHASES-FILES-NEXT: 37: assembler, {36}, object, (device-openmp)
 // CHK-PHASES-FILES-NEXT: 38: offload, "device-openmp (x86_64-pc-linux-gnu)" {37}, object
-// CHK-PHASES-FILES-NEXT: 39: clang-offload-packager, {31, 38}, image, (device-openmp)
+// CHK-PHASES-FILES-NEXT: 39: llvm-offload-binary, {31, 38}, image, (device-openmp)
 // CHK-PHASES-FILES-NEXT: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {24}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, ir
 // CHK-PHASES-FILES-NEXT: 41: backend, {40}, assembler, (host-openmp)
 // CHK-PHASES-FILES-NEXT: 42: assembler, {41}, object, (host-openmp)
diff --git a/clang/test/Driver/spirv-openmp-toolchain.c b/clang/test/Driver/spirv-openmp-toolchain.c
index 1542f50..6bf8984 100644
--- a/clang/test/Driver/spirv-openmp-toolchain.c
+++ b/clang/test/Driver/spirv-openmp-toolchain.c
@@ -21,7 +21,7 @@
 // CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp)
 // CHECK-PHASES: 8: assembler, {7}, object, (device-openmp)
 // CHECK-PHASES: 9: offload, "device-openmp (spirv64-intel)" {8}, object
-// CHECK-PHASES: 10: clang-offload-packager, {9}, image, (device-openmp)
+// CHECK-PHASES: 10: llvm-offload-binary, {9}, image, (device-openmp)
 // CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {10}, ir
 // CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp)
 // CHECK-PHASES: 13: assembler, {12}, object, (host-openmp)
diff --git a/clang/test/Driver/sycl-offload-jit.cpp b/clang/test/Driver/sycl-offload-jit.cpp
index e040f4d..72c2390 100644
--- a/clang/test/Driver/sycl-offload-jit.cpp
+++ b/clang/test/Driver/sycl-offload-jit.cpp
@@ -13,21 +13,21 @@
 // CHK-PHASES-NEXT: 5: compiler, {4}, ir, (device-sycl)
 // CHK-PHASES-NEXT: 6: backend, {5}, ir, (device-sycl)
 // CHK-PHASES-NEXT: 7: offload, "device-sycl (spirv64-unknown-unknown)" {6}, ir
-// CHK-PHASES-NEXT: 8: clang-offload-packager, {7}, image, (device-sycl)
+// CHK-PHASES-NEXT: 8: llvm-offload-binary, {7}, image, (device-sycl)
 // CHK-PHASES-NEXT: 9: offload, "host-sycl (x86_64{{.*}})" {2}, "device-sycl (x86_64{{.*}})" {8}, ir
 // CHK-PHASES-NEXT: 10: backend, {9}, assembler, (host-sycl)
 // CHK-PHASES-NEXT: 11: assembler, {10}, object, (host-sycl)
 // CHK-PHASES-NEXT: 12: clang-linker-wrapper, {11}, image, (host-sycl)
 
 /// Check expected default values for device compilation when using -fsycl as
-/// well as clang-offload-packager inputs.
+/// well as llvm-offload-binary inputs.
 // RUN: %clang -### -fsycl -c --target=x86_64-unknown-linux-gnu %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-DEVICE-TRIPLE %s
 // CHK-DEVICE-TRIPLE: "-cc1"{{.*}} "-triple" "spirv64-unknown-unknown"
 // CHK-DEVICE-TRIPLE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHK-DEVICE-TRIPLE-SAME: "-fsycl-is-device"
 // CHK-DEVICE-TRIPLE-SAME: "-O2"
-// CHK-DEVICE-TRIPLE: clang-offload-packager{{.*}} "--image=file={{.*}}.bc,triple=spirv64-unknown-unknown,arch=generic,kind=sycl"
+// CHK-DEVICE-TRIPLE: llvm-offload-binary{{.*}} "--image=file={{.*}}.bc,triple=spirv64-unknown-unknown,arch=generic,kind=sycl"
 
 /// Check -fsycl-is-device is passed when compiling for the device.
 /// Check -fsycl-is-host is passed when compiling for host.
diff --git a/clang/test/SemaCXX/GH161671.cpp b/clang/test/SemaCXX/GH161671.cpp
new file mode 100644
index 0000000..de09e54
--- /dev/null
+++ b/clang/test/SemaCXX/GH161671.cpp
@@ -0,0 +1,339 @@
+// RUN: %clang_cc1 -std=c++20 -w %s
+// RUN: %clang_cc1 -std=c++2c -w %s
+// expected-no-diagnostics
+
+namespace std {
+template <typename _Tp, _Tp __v> struct integral_constant {
+  static constexpr _Tp value = __v;
+  using value_type = _Tp;
+};
+template <bool __v> using __bool_constant = integral_constant<bool, __v>;
+template <typename> struct is_integral : integral_constant<bool, true> {};
+template <typename> struct is_signed : integral_constant<bool, false> {};
+template <typename _Tp, typename _Up = _Tp> _Up __declval(int);
+template <typename _Tp> auto declval() -> decltype(__declval<_Tp>(0));
+template <typename> struct make_unsigned {
+  using type = int;
+};
+template <typename _Tp> struct decay {
+  using type = _Tp;
+};
+template <int, typename _Iftrue, typename> struct conditional {
+  using type = _Iftrue;
+};
+} // namespace std
+namespace meta {
+template <template <typename...> class> struct quote;
+template <template <typename> class C, typename... Ts>
+concept valid = requires { typename C<Ts...>; };
+template <typename T>
+concept trait = requires { typename T; };
+template <typename T>
+concept invocable = requires { typename quote<T::template invoke>; };
+template <typename T>
+concept integral = requires { T::value; };
+template <trait T> using _t = T::type;
+template <integral T> constexpr T::value_type _v = T::value;
+template <bool B> using bool_ = std::integral_constant<bool, B>;
+template <invocable Fn, typename... Args>
+using invoke = Fn::template invoke<Args...>;
+template <typename> struct id;
+namespace detail {
+template <template <typename> class, typename...> struct defer_;
+template <template <typename> class C, typename... Ts>
+  requires valid<C, Ts...>
+struct defer_<C, Ts...> {
+  using type = C<Ts...>;
+};
+} // namespace detail
+template <template <typename> class C, typename... Ts>
+struct defer : detail::defer_<C, Ts...> {};
+template <template <typename...> class C> struct quote {
+  template <typename... Ts> using invoke = _t<defer<C, Ts...>>;
+};
+namespace detail {
+template <int> struct _cond {
+  template <typename Then, typename> using invoke = Then;
+};
+template <> struct _cond<false>;
+} // namespace detail
+template <bool If, typename Then, typename Else>
+using conditional_t = detail::_cond<If>::template invoke<Then, Else>;
+namespace detail {
+template <typename...> struct _if_;
+template <typename If, typename Then, typename Else>
+struct _if_<If, Then, Else> : std::conditional<_v<If>, Then, Else> {};
+} // namespace detail
+template <bool If, typename... Args>
+using if_c = _t<detail::_if_<bool_<If>, Args...>>;
+} // namespace meta
+template <bool> void requires_();
+template <typename A, typename B>
+concept same_as = __is_same(B, A);
+namespace ranges {
+template <typename> struct view_closure;
+template <typename T> using decay_t = meta::_t<std::decay<T>>;
+enum cardinality { unknown };
+template <cardinality> struct basic_view {};
+} // namespace ranges
+namespace std {
+template <typename> struct vector {};
+} // namespace std
+namespace ranges {
+struct {
+  template <typename F, typename... Args>
+  auto operator()(F f, Args... args) -> decltype(f(args...));
+} invoke;
+template <typename Fun, typename... Args>
+using invoke_result_t =
+    decltype(invoke(std::declval<Fun>(), std::declval<Args>()...));
+namespace detail {
+struct with_difference_type_;
+template <typename T> using iter_value_t_ = T ::value_type;
+} // namespace detail
+template <typename R> using iter_value_t = detail::iter_value_t_<R>;
+namespace detail {
+template <typename I>
+using iter_size_t =
+    meta::_t<meta::conditional_t<std::is_integral<I>::value,
+                                 std::make_unsigned<I>, meta::id<I>>>;
+template <typename D>
+concept signed_integer_like_impl_concept_ =
+    std::integral_constant<bool, -D()>::value;
+template <typename D>
+concept signed_integer_like_ = signed_integer_like_impl_concept_<D>;
+} // namespace detail
+template <typename S, typename I>
+concept sized_sentinel_for_requires_ =
+    requires(S s, I i) { requires_<same_as<I, decltype(i - s)>>; };
+template <typename S, typename I>
+concept sized_sentinel_for = sized_sentinel_for_requires_<S, I>;
+struct range_access {
+  template <typename Rng>
+  static auto begin_cursor(Rng rng) -> decltype(rng.begin_cursor());
+  template <typename Cur, typename O>
+  static auto distance_to(Cur pos, O other) -> decltype(pos.distance_to(other));
+};
+namespace detail {
+template <typename S, typename C>
+concept sized_sentinel_for_cursor_requires_ = requires(S s, C c) {
+  requires_<signed_integer_like_<decltype(range_access::distance_to(c, s))>>;
+};
+template <typename S, typename C>
+concept sized_sentinel_for_cursor = sized_sentinel_for_cursor_requires_<S, C>;
+struct iterator_associated_types_base_ {
+  typedef range_access value_type;
+};
+template <typename>
+using iterator_associated_types_base = iterator_associated_types_base_;
+} // namespace detail
+template <typename>
+struct basic_iterator : detail::iterator_associated_types_base<int> {};
+template <typename Cur2, typename Cur>
+  requires detail::sized_sentinel_for_cursor<Cur2, Cur>
+void operator-(basic_iterator<Cur2>, basic_iterator<Cur>);
+namespace _begin_ {
+template <typename T>
+concept has_member_begin_requires_ = requires(T t) { t; };
+template <typename T>
+concept has_member_begin = has_member_begin_requires_<T>;
+struct _member_result_ {
+  template <typename R>
+  using invoke = decltype(static_cast<R (*)()>(nullptr)().begin());
+};
+struct _non_member_result_;
+struct fn {
+  template <typename R>
+  using _result_t =
+      meta::invoke<meta::conditional_t<has_member_begin<R>, _member_result_,
+                                       _non_member_result_>,
+                   R>;
+  template <typename R> _result_t<R> operator()(R);
+};
+} // namespace _begin_
+_begin_::fn begin;
+namespace _end_ {
+template <typename>
+concept has_member_end_requires_ = requires { begin; };
+template <typename T>
+concept has_member_end = has_member_end_requires_<T>;
+struct _member_result_ {
+  template <typename R>
+  using invoke = decltype(static_cast<R (*)()>(nullptr)().end());
+};
+struct _non_member_result_;
+struct fn {
+  template <typename R>
+  using _result_t =
+      meta::invoke<meta::conditional_t<has_member_end<R>, _member_result_,
+                                       _non_member_result_>,
+                   R>;
+  template <typename R> _result_t<R> operator()(R);
+};
+} // namespace _end_
+_end_::fn end;
+template <typename Rng>
+using iterator_t = decltype(begin(static_cast<Rng (*)()>(nullptr)()));
+template <typename Rng>
+using sentinel_t = decltype(end(static_cast<Rng (*)()>(nullptr)()));
+template <typename T>
+concept has_member_size_requires_ = requires(T t) { t.size(); };
+template <typename T>
+concept has_member_size = has_member_size_requires_<T>;
+struct _other_result_;
+struct _member_result_ {
+  template <typename> using invoke = decltype(0);
+  template <typename R>
+  using _result_t = meta::invoke<
+      meta::conditional_t<has_member_size<R>, _member_result_, _other_result_>,
+      R>;
+  template <typename R> _result_t<R> operator()(R r) { r.size(); }
+} size;
+template <typename Rng> using range_value_t = iter_value_t<iterator_t<Rng>>;
+namespace detail {
+template <cardinality Card>
+std::integral_constant<cardinality, Card> test_cardinality(basic_view<Card> *);
+}
+template <typename Rng>
+struct range_cardinality
+    : meta::conditional_t<__is_same(Rng, Rng),
+                          decltype(detail::test_cardinality(
+                              static_cast<Rng *>(nullptr))),
+                          Rng> {};
+template <typename T>
+concept sized_range_requires_ = requires(T t) { size(t); };
+template <typename T>
+concept sized_range = sized_range_requires_<T>;
+namespace detail {
+template <int> struct dependent_ {
+  template <typename T> using invoke = T;
+};
+} // namespace detail
+template <typename Derived, cardinality Cardinality>
+struct view_interface : basic_view<Cardinality> {
+  template <bool B> using D = meta::invoke<detail::dependent_<B>, Derived>;
+  Derived derived();
+  template <bool True = true>
+    requires sized_sentinel_for<sentinel_t<D<True>>, iterator_t<D<True>>>
+  detail::iter_size_t<iterator_t<D<True>>> size() {
+    derived().end() - derived().begin();
+  }
+};
+struct {
+  template <typename Fun> view_closure<Fun> operator()(Fun);
+} make_view_closure;
+struct view_closure_base {
+  template <typename Rng, typename ViewFn>
+  friend auto operator|(Rng rng, ViewFn vw) {
+    return vw(rng);
+  }
+};
+template <typename ViewFn> struct view_closure : view_closure_base, ViewFn {};
+namespace detail {
+template <typename Derived>
+using begin_cursor_t =
+    decay_t<decltype(range_access::begin_cursor(std::declval<Derived>()))>;
+template <typename Derived>
+using facade_iterator_t = basic_iterator<begin_cursor_t<Derived>>;
+template <typename Derived>
+using facade_sentinel_t =
+    meta::if_c<same_as<Derived, Derived>, facade_iterator_t<Derived>, Derived>;
+} // namespace detail
+template <typename Derived, cardinality Cardinality>
+struct view_facade : view_interface<Derived, Cardinality> {
+  template <typename D = Derived> auto begin() -> detail::facade_iterator_t<D>;
+  template <typename D = Derived> auto end() -> detail::facade_sentinel_t<D>;
+};
+template <typename Derived, cardinality Cardinality>
+struct view_adaptor : view_facade<Derived, Cardinality> {
+  auto begin_cursor() -> decltype(0);
+};
+namespace detail {
+template <typename...> struct bind_back_fn_;
+template <typename Fn, typename Arg> struct bind_back_fn_<Fn, Arg> {
+  template <typename... CallArgs>
+  invoke_result_t<Fn, CallArgs..., Arg> operator()(CallArgs...);
+};
+template <typename Fn, typename... Args>
+using bind_back_fn = bind_back_fn_<Fn, Args...>;
+} // namespace detail
+struct {
+  template <typename Fn, typename Arg1>
+  detail::bind_back_fn<Fn, Arg1> operator()(Fn, Arg1);
+} bind_back;
+namespace detail {
+struct to_container {
+  template <typename> struct fn;
+  template <typename, typename> struct closure;
+};
+template <typename, typename, typename R>
+concept to_container_reserve = sized_range<R>;
+template <typename MetaFn, typename Rng>
+using container_t = meta::invoke<MetaFn, Rng>;
+struct to_container_closure_base {
+  template <typename Rng, typename MetaFn, typename Fn>
+  friend auto operator|(Rng rng, to_container::closure<MetaFn, Fn> fn) {
+    return fn(rng);
+  }
+};
+template <typename, typename Fn>
+struct to_container::closure : to_container_closure_base, Fn {};
+template <typename MetaFn> struct to_container::fn {
+  template <typename Rng> void impl(Rng, std::__bool_constant<false>);
+  template <typename Rng> void impl(Rng rng, std::__bool_constant<true>) {
+    size(rng);
+  }
+  template <typename Rng> container_t<MetaFn, Rng> operator()(Rng rng) {
+    using cont_t = container_t<MetaFn, Rng>;
+    using iter_t = Rng;
+    using use_reserve_t =
+        meta::bool_<to_container_reserve<cont_t, iter_t, Rng>>;
+    impl(rng, use_reserve_t{});
+  }
+};
+template <typename MetaFn, typename Fn>
+using to_container_closure = to_container::closure<MetaFn, Fn>;
+template <typename MetaFn>
+using to_container_fn = to_container_closure<MetaFn, to_container::fn<MetaFn>>;
+template <template <typename> class ContT> struct from_range {
+  template <typename Rng>
+  static auto from_rng_(long)
+      -> meta::invoke<meta::quote<ContT>, range_value_t<Rng>>;
+  template <typename Rng> using invoke = decltype(from_rng_<Rng>(0));
+};
+} // namespace detail
+detail::to_container_fn<detail::from_range<std::vector>> to_vector;
+template <typename Rng>
+struct remove_if_view
+    : view_adaptor<remove_if_view<Rng>, range_cardinality<Rng>::value> {};
+struct filter_base_fn {
+  template <typename Rng, typename Pred>
+  remove_if_view<Rng> operator()(Rng, Pred);
+  template <typename Pred> auto operator()(Pred pred) {
+    return make_view_closure(bind_back(filter_base_fn{}, pred));
+  }
+} filter;
+namespace detail {
+struct promote_as_signed_;
+template <typename I>
+using iota_difference_t =
+    meta::conditional_t<std::is_integral<I>::value, promote_as_signed_,
+                        with_difference_type_>;
+} // namespace detail
+template <typename, typename>
+struct iota_view : view_facade<iota_view<int, int>, unknown> {
+  struct cursor {
+    auto distance_to(cursor) -> detail::iota_difference_t<int>;
+  };
+  cursor begin_cursor();
+};
+struct {
+  template <typename From, typename To>
+    requires(std::is_signed<From>::value == std::is_signed<To>::value)
+  iota_view<From, To> operator()(From, To);
+} iota;
+} // namespace ranges
+void foo() {
+  ranges::iota(0, 1) | ranges::to_vector =
+      ranges::iota(0, 1) | ranges::filter([] {}) | ranges::to_vector;
+}
diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
index e5e081f..3b7c138 100644
--- a/clang/test/SemaTemplate/concepts.cpp
+++ b/clang/test/SemaTemplate/concepts.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++20 -ferror-limit 0 -verify %s
+// RUN: %clang_cc1 -std=c++20 -ferror-limit 0 -verify=expected,cxx20 %s
+// RUN: %clang_cc1 -std=c++2c -ferror-limit 0 -verify=expected %s
 
 namespace PR47043 {
   template<typename T> concept True = true;
@@ -1405,3 +1406,41 @@ static_assert(!std::is_constructible_v<span<4>, array<int, 3>>);
 }
 
 }
+
+
+namespace GH162125 {
+template<typename, int size>
+concept true_int = (size, true);
+
+template<typename, typename... Ts>
+concept true_types = true_int<void, sizeof...(Ts)>;
+
+template<typename, typename... Ts>
+concept true_types2 = true_int<void, Ts...[0]{1}>; // cxx20-warning {{pack indexing is a C++2c extension}}
+
+template<typename... Ts>
+struct s {
+  template<typename T> requires true_types<T, Ts...> && true_types2<T, Ts...>
+  static void f(T);
+};
+void(*test)(int) = &s<bool>::f<int>;
+}
+
+namespace GH162125_reversed {
+template<int size, typename>
+concept true_int = (size, true);
+
+template<typename, typename... Ts>
+concept true_types = true_int<sizeof...(Ts), void>;
+
+template<typename, typename... Ts>
+concept true_types2 = true_int<Ts...[0]{1}, void>; // cxx20-warning {{pack indexing is a C++2c extension}}
+
+template<typename... Ts>
+struct s {
+  template<typename T> requires true_types<T, Ts...> && true_types2<T, Ts...>
+  static void f(T);
+};
+
+void(*test)(int) = &s<bool>::f<int>;
+}
diff --git a/clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp b/clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp
index 85208fc..8a7d36d 100644
--- a/clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp
+++ b/clang/test/Tooling/clang-linker-wrapper-spirv-elf.cpp
@@ -6,7 +6,7 @@
 // RUN: cd %t_tmp
 // RUN: %clangxx -fopenmp -fopenmp-targets=spirv64-intel -nogpulib -c -o %t_clang-linker-wrapper-spirv-elf.o %s
 // RUN: not clang-linker-wrapper -o a.out %t_clang-linker-wrapper-spirv-elf.o --save-temps --linker-path=ld
-// RUN: clang-offload-packager --image=triple=spirv64-intel,kind=openmp,file=%t.elf  %t_tmp/a.out.openmp.image.wrapper.o
+// RUN: llvm-offload-binary --image=triple=spirv64-intel,kind=openmp,file=%t.elf  %t_tmp/a.out.openmp.image.wrapper.o
 // RUN: llvm-readelf -h %t.elf | FileCheck -check-prefix=CHECK-MACHINE %s
 // RUN: llvm-readelf -t %t.elf | FileCheck -check-prefix=CHECK-SECTION %s
 // RUN: llvm-readelf -n %t.elf | FileCheck -check-prefix=CHECK-NOTES %s
diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py
index e6c79d7..29088ef 100644
--- a/clang/test/lit.cfg.py
+++ b/clang/test/lit.cfg.py
@@ -92,7 +92,7 @@ tools = [
     "clang-diff",
     "clang-format",
     "clang-repl",
-    "clang-offload-packager",
+    "llvm-offload-binary",
     "clang-tblgen",
     "clang-scan-deps",
     "clang-installapi",
diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
index 50e3d69..7a7c56ae 100644
--- a/clang/tools/CMakeLists.txt
+++ b/clang/tools/CMakeLists.txt
@@ -14,7 +14,6 @@ add_clang_subdirectory(clang-fuzzer)
 add_clang_subdirectory(clang-import-test)
 add_clang_subdirectory(clang-linker-wrapper)
 add_clang_subdirectory(clang-nvlink-wrapper)
-add_clang_subdirectory(clang-offload-packager)
 add_clang_subdirectory(clang-offload-bundler)
 add_clang_subdirectory(clang-scan-deps)
 add_clang_subdirectory(clang-sycl-linker)
diff --git a/clang/tools/clang-offload-packager/CMakeLists.txt b/clang/tools/clang-offload-packager/CMakeLists.txt
deleted file mode 100644
index 1c29e37..0000000
--- a/clang/tools/clang-offload-packager/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-set(LLVM_LINK_COMPONENTS 
-  ${LLVM_TARGETS_TO_BUILD}
-  BinaryFormat
-  Object
-  Support)
-
-add_clang_tool(clang-offload-packager
-  ClangOffloadPackager.cpp
-
-  DEPENDS
-  ${tablegen_deps}
-  )
-
-clang_target_link_libraries(clang-offload-packager
-  PRIVATE
-  clangBasic
-  )
diff --git a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
deleted file mode 100644
index 64b058e..0000000
--- a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-//===-- clang-offload-packager/ClangOffloadPackager.cpp - file bundler ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===---------------------------------------------------------------------===//
-//
-// This tool takes several device object files and bundles them into a single
-// binary image using a custom binary format. This is intended to be used to
-// embed many device files into an application to create a fat binary.
-//
-//===---------------------------------------------------------------------===//
-
-#include "clang/Basic/Version.h"
-
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/BinaryFormat/Magic.h"
-#include "llvm/Object/ArchiveWriter.h"
-#include "llvm/Object/OffloadBinary.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/Support/StringSaver.h"
-#include "llvm/Support/WithColor.h"
-
-using namespace llvm;
-using namespace llvm::object;
-
-static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
-
-static cl::OptionCategory
-    ClangOffloadPackagerCategory("clang-offload-packager options");
-
-static cl::opt<std::string> OutputFile("o", cl::desc("Write output to <file>."),
-                                       cl::value_desc("file"),
-                                       cl::cat(ClangOffloadPackagerCategory));
-
-static cl::opt<std::string> InputFile(cl::Positional,
-                                      cl::desc("Extract from <file>."),
-                                      cl::value_desc("file"),
-                                      cl::cat(ClangOffloadPackagerCategory));
-
-static cl::list<std::string>
-    DeviceImages("image",
-                 cl::desc("List of key and value arguments. Required keywords "
-                          "are 'file' and 'triple'."),
-                 cl::value_desc("<key>=<value>,..."),
-                 cl::cat(ClangOffloadPackagerCategory));
-
-static cl::opt<bool>
-    CreateArchive("archive",
-                  cl::desc("Write extracted files to a static archive"),
-                  cl::cat(ClangOffloadPackagerCategory));
-
-/// Path of the current binary.
-static const char *PackagerExecutable;
-
-static void PrintVersion(raw_ostream &OS) {
-  OS << clang::getClangToolFullVersion("clang-offload-packager") << '\n';
-}
-
-// Get a map containing all the arguments for the image. Repeated arguments will
-// be placed in a comma separated list.
-static DenseMap<StringRef, StringRef> getImageArguments(StringRef Image,
-                                                        StringSaver &Saver) {
-  DenseMap<StringRef, StringRef> Args;
-  for (StringRef Arg : llvm::split(Image, ",")) {
-    auto [Key, Value] = Arg.split("=");
-    auto [It, Inserted] = Args.try_emplace(Key, Value);
-    if (!Inserted)
-      It->second = Saver.save(It->second + "," + Value);
-  }
-
-  return Args;
-}
-
-static Error writeFile(StringRef Filename, StringRef Data) {
-  Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
-      FileOutputBuffer::create(Filename, Data.size());
-  if (!OutputOrErr)
-    return OutputOrErr.takeError();
-  std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
-  llvm::copy(Data, Output->getBufferStart());
-  if (Error E = Output->commit())
-    return E;
-  return Error::success();
-}
-
-static Error bundleImages() {
-  SmallVector<char, 1024> BinaryData;
-  raw_svector_ostream OS(BinaryData);
-  for (StringRef Image : DeviceImages) {
-    BumpPtrAllocator Alloc;
-    StringSaver Saver(Alloc);
-    DenseMap<StringRef, StringRef> Args = getImageArguments(Image, Saver);
-
-    if (!Args.count("triple") || !Args.count("file"))
-      return createStringError(
-          inconvertibleErrorCode(),
-          "'file' and 'triple' are required image arguments");
-
-    // Permit using multiple instances of `file` in a single string.
-    for (auto &File : llvm::split(Args["file"], ",")) {
-      OffloadBinary::OffloadingImage ImageBinary{};
-
-      llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ObjectOrErr =
-          llvm::MemoryBuffer::getFileOrSTDIN(File);
-      if (std::error_code EC = ObjectOrErr.getError())
-        return errorCodeToError(EC);
-
-      // Clang uses the '.o' suffix for LTO bitcode.
-      if (identify_magic((*ObjectOrErr)->getBuffer()) == file_magic::bitcode)
-        ImageBinary.TheImageKind = object::IMG_Bitcode;
-      else
-        ImageBinary.TheImageKind =
-            getImageKind(sys::path::extension(File).drop_front());
-      ImageBinary.Image = std::move(*ObjectOrErr);
-      for (const auto &[Key, Value] : Args) {
-        if (Key == "kind") {
-          ImageBinary.TheOffloadKind = getOffloadKind(Value);
-        } else if (Key != "file") {
-          ImageBinary.StringData[Key] = Value;
-        }
-      }
-      llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary);
-      if (Buffer.size() % OffloadBinary::getAlignment() != 0)
-        return createStringError(inconvertibleErrorCode(),
-                                 "Offload binary has invalid size alignment");
-      OS << Buffer;
-    }
-  }
-
-  if (Error E = writeFile(OutputFile,
-                          StringRef(BinaryData.begin(), BinaryData.size())))
-    return E;
-  return Error::success();
-}
-
-static Error unbundleImages() {
-  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
-      MemoryBuffer::getFileOrSTDIN(InputFile);
-  if (std::error_code EC = BufferOrErr.getError())
-    return createFileError(InputFile, EC);
-  std::unique_ptr<MemoryBuffer> Buffer = std::move(*BufferOrErr);
-
-  // This data can be misaligned if extracted from an archive.
-  if (!isAddrAligned(Align(OffloadBinary::getAlignment()),
-                     Buffer->getBufferStart()))
-    Buffer = MemoryBuffer::getMemBufferCopy(Buffer->getBuffer(),
-                                            Buffer->getBufferIdentifier());
-
-  SmallVector<OffloadFile> Binaries;
-  if (Error Err = extractOffloadBinaries(*Buffer, Binaries))
-    return Err;
-
-  // Try to extract each device image specified by the user from the input file.
-  for (StringRef Image : DeviceImages) {
-    BumpPtrAllocator Alloc;
-    StringSaver Saver(Alloc);
-    auto Args = getImageArguments(Image, Saver);
-
-    SmallVector<const OffloadBinary *> Extracted;
-    for (const OffloadFile &File : Binaries) {
-      const auto *Binary = File.getBinary();
-      // We handle the 'file' and 'kind' identifiers differently.
-      bool Match = llvm::all_of(Args, [&](auto &Arg) {
-        const auto [Key, Value] = Arg;
-        if (Key == "file")
-          return true;
-        if (Key == "kind")
-          return Binary->getOffloadKind() == getOffloadKind(Value);
-        return Binary->getString(Key) == Value;
-      });
-      if (Match)
-        Extracted.push_back(Binary);
-    }
-
-    if (Extracted.empty())
-      continue;
-
-    if (CreateArchive) {
-      if (!Args.count("file"))
-        return createStringError(inconvertibleErrorCode(),
-                                 "Image must have a 'file' argument.");
-
-      SmallVector<NewArchiveMember> Members;
-      for (const OffloadBinary *Binary : Extracted)
-        Members.emplace_back(MemoryBufferRef(
-            Binary->getImage(),
-            Binary->getMemoryBufferRef().getBufferIdentifier()));
-
-      if (Error E = writeArchive(
-              Args["file"], Members, SymtabWritingMode::NormalSymtab,
-              Archive::getDefaultKind(), true, false, nullptr))
-        return E;
-    } else if (auto It = Args.find("file"); It != Args.end()) {
-      if (Extracted.size() > 1)
-        WithColor::warning(errs(), PackagerExecutable)
-            << "Multiple inputs match to a single file, '" << It->second
-            << "'\n";
-      if (Error E = writeFile(It->second, Extracted.back()->getImage()))
-        return E;
-    } else {
-      uint64_t Idx = 0;
-      for (const OffloadBinary *Binary : Extracted) {
-        StringRef Filename =
-            Saver.save(sys::path::stem(InputFile) + "-" + Binary->getTriple() +
-                       "-" + Binary->getArch() + "." + std::to_string(Idx++) +
-                       "." + getImageKindName(Binary->getImageKind()));
-        if (Error E = writeFile(Filename, Binary->getImage()))
-          return E;
-      }
-    }
-  }
-
-  return Error::success();
-}
-
-int main(int argc, const char **argv) {
-  sys::PrintStackTraceOnErrorSignal(argv[0]);
-  cl::HideUnrelatedOptions(ClangOffloadPackagerCategory);
-  cl::SetVersionPrinter(PrintVersion);
-  cl::ParseCommandLineOptions(
-      argc, argv,
-      "A utility for bundling several object files into a single binary.\n"
-      "The output binary can then be embedded into the host section table\n"
-      "to create a fatbinary containing offloading code.\n");
-
-  if (Help) {
-    cl::PrintHelpMessage();
-    return EXIT_SUCCESS;
-  }
-
-  PackagerExecutable = argv[0];
-  auto reportError = [argv](Error E) {
-    logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
-    return EXIT_FAILURE;
-  };
-
-  if (!InputFile.empty() && !OutputFile.empty())
-    return reportError(
-        createStringError(inconvertibleErrorCode(),
-                          "Packaging to an output file and extracting from an "
-                          "input file are mutually exclusive."));
-
-  if (!OutputFile.empty()) {
-    if (Error Err = bundleImages())
-      return reportError(std::move(Err));
-  } else if (!InputFile.empty()) {
-    if (Error Err = unbundleImages())
-      return reportError(std::move(Err));
-  }
-
-  return EXIT_SUCCESS;
-}