57 files changed, 2029 insertions, 679 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 056bfe3..a8b41ba 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -330,76 +330,6 @@ void ASTContext::addComment(const RawComment &RC) {
   Comments.addComment(RC, LangOpts.CommentOpts, BumpAlloc);
 }
 
-/// If we have a 'templated' declaration for a template, adjust 'D' to
-/// refer to the actual template.
-/// If we have an implicit instantiation, adjust 'D' to refer to template.
-static const Decl &adjustDeclToTemplate(const Decl &D) {
-  if (const auto *FD = dyn_cast<FunctionDecl>(&D)) {
-    // Is this function declaration part of a function template?
-    if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
-      return *FTD;
-
-    // Nothing to do if function is not an implicit instantiation.
-    if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation)
-      return D;
-
-    // Function is an implicit instantiation of a function template?
-    if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate())
-      return *FTD;
-
-    // Function is instantiated from a member definition of a class template?
-    if (const FunctionDecl *MemberDecl =
-            FD->getInstantiatedFromMemberFunction())
-      return *MemberDecl;
-
-    return D;
-  }
-  if (const auto *VD = dyn_cast<VarDecl>(&D)) {
-    // Static data member is instantiated from a member definition of a class
-    // template?
-    if (VD->isStaticDataMember())
-      if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember())
-        return *MemberDecl;
-
-    return D;
-  }
-  if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) {
-    // Is this class declaration part of a class template?
-    if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate())
-      return *CTD;
-
-    // Class is an implicit instantiation of a class template or partial
-    // specialization?
-    if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) {
-      if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation)
-        return D;
-      llvm::PointerUnion<ClassTemplateDecl *,
-                         ClassTemplatePartialSpecializationDecl *>
-          PU = CTSD->getSpecializedTemplateOrPartial();
-      return isa<ClassTemplateDecl *>(PU)
-                 ? *static_cast<const Decl *>(cast<ClassTemplateDecl *>(PU))
-                 : *static_cast<const Decl *>(
-                       cast<ClassTemplatePartialSpecializationDecl *>(PU));
-    }
-
-    // Class is instantiated from a member definition of a class template?
-    if (const MemberSpecializationInfo *Info =
-            CRD->getMemberSpecializationInfo())
-      return *Info->getInstantiatedFrom();
-
-    return D;
-  }
-  if (const auto *ED = dyn_cast<EnumDecl>(&D)) {
-    // Enum is instantiated from a member definition of a class template?
-    if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum())
-      return *MemberDecl;
-
-    return D;
-  }
-  // FIXME: Adjust alias templates?
-  return D;
-}
-
 const RawComment *ASTContext::getRawCommentForAnyRedecl(
                                                 const Decl *D,
                                                 const Decl **OriginalDecl) const {
@@ -976,6 +906,9 @@ void ASTContext::cleanup() {
   for (const auto &Value : ModuleInitializers)
     Value.second->~PerModuleInitializers();
   ModuleInitializers.clear();
+
+  XRayFilter.reset();
+  NoSanitizeL.reset();
 }
 
 ASTContext::~ASTContext() { cleanup(); }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a3c4ba5..1eea813 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2314,10 +2314,14 @@ static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
     if (Ptr.isBaseClass())
       ByteOffset = computePointerOffset(ASTCtx, Ptr.getBase()) -
                    computePointerOffset(ASTCtx, Ptr);
-    else
-      ByteOffset =
-          computePointerOffset(ASTCtx, Ptr) -
-          computePointerOffset(ASTCtx, Ptr.expand().atIndex(0).narrow());
+    else {
+      if (Ptr.inArray())
+        ByteOffset =
+            computePointerOffset(ASTCtx, Ptr) -
+            computePointerOffset(ASTCtx, Ptr.expand().atIndex(0).narrow());
+      else
+        ByteOffset = 0;
+    }
   } else
     ByteOffset = computePointerOffset(ASTCtx, Ptr);
 
@@ -2897,7 +2901,49 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
   });
 
   Dst.initializeAllElements();
+  return true;
+}
+
+static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
+                                          const CallExpr *Call, bool MaskZ) {
+  assert(Call->getNumArgs() == 5);
 
+  APInt U = popToAPSInt(S, Call->getArg(4));   // Lane mask
+  APInt Imm = popToAPSInt(S, Call->getArg(3)); // Ternary truth table
+  const Pointer &C = S.Stk.pop<Pointer>();
+  const Pointer &B = S.Stk.pop<Pointer>();
+  const Pointer &A = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  unsigned DstLen = A.getNumElems();
+  const QualType ElemQT = getElemType(A);
+  const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+  unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
+  bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
+
+  INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+    for (unsigned I = 0; I != DstLen; ++I) {
+      APInt ALane = A.elem<T>(I).toAPSInt();
+      APInt BLane = B.elem<T>(I).toAPSInt();
+      APInt CLane = C.elem<T>(I).toAPSInt();
+      APInt RLane(LaneWidth, 0);
+      if (U[I]) { // If lane not masked, compute ternary logic.
+        for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) {
+          unsigned ABit = ALane[Bit];
+          unsigned BBit = BLane[Bit];
+          unsigned CBit = CLane[Bit];
+          unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
+          RLane.setBitVal(Bit, Imm[Idx]);
+        }
+        Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
+      } else if (MaskZ) { // If zero masked, zero the lane.
+        Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
+      } else { // Just masked, put in A lane.
+        Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned));
+      }
+    }
+  });
+  Dst.initializeAllElements();
   return true;
 }
 
@@ -3760,6 +3806,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         S, OpPC, Call,
         [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
 
+  case X86::BI__builtin_ia32_pternlogd128_mask:
+  case X86::BI__builtin_ia32_pternlogd256_mask:
+  case X86::BI__builtin_ia32_pternlogd512_mask:
+  case X86::BI__builtin_ia32_pternlogq128_mask:
+  case X86::BI__builtin_ia32_pternlogq256_mask:
+  case X86::BI__builtin_ia32_pternlogq512_mask:
+    return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false);
+  case X86::BI__builtin_ia32_pternlogd128_maskz:
+  case X86::BI__builtin_ia32_pternlogd256_maskz:
+  case X86::BI__builtin_ia32_pternlogd512_maskz:
+  case X86::BI__builtin_ia32_pternlogq128_maskz:
+  case X86::BI__builtin_ia32_pternlogq256_maskz:
+  case X86::BI__builtin_ia32_pternlogq512_maskz:
+    return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true);
   case Builtin::BI__builtin_elementwise_fshl:
     return interp__builtin_elementwise_triop(S, OpPC, Call,
                                              llvm::APIntOps::fshl);
diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp
index 7001ade..7f3dcca 100644
--- a/clang/lib/AST/DeclPrinter.cpp
+++ b/clang/lib/AST/DeclPrinter.cpp
@@ -111,6 +111,7 @@ namespace {
     void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D);
     void VisitTemplateTypeParmDecl(const TemplateTypeParmDecl *TTP);
     void VisitNonTypeTemplateParmDecl(const NonTypeTemplateParmDecl *NTTP);
+    void VisitTemplateTemplateParmDecl(const TemplateTemplateParmDecl *);
     void VisitHLSLBufferDecl(HLSLBufferDecl *D);
 
     void VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D);
@@ -1189,8 +1190,7 @@ void DeclPrinter::printTemplateParameters(const TemplateParameterList *Params,
     } else if (auto NTTP = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
       VisitNonTypeTemplateParmDecl(NTTP);
     } else if (auto TTPD = dyn_cast<TemplateTemplateParmDecl>(Param)) {
-      VisitTemplateDecl(TTPD);
-      // FIXME: print the default argument, if present.
+      VisitTemplateTemplateParmDecl(TTPD);
     }
   }
 
@@ -1916,6 +1916,16 @@ void DeclPrinter::VisitNonTypeTemplateParmDecl(
   }
 }
 
+void DeclPrinter::VisitTemplateTemplateParmDecl(
+    const TemplateTemplateParmDecl *TTPD) {
+  VisitTemplateDecl(TTPD);
+  if (TTPD->hasDefaultArgument() && !TTPD->defaultArgumentWasInherited()) {
+    Out << " = ";
+    TTPD->getDefaultArgument().getArgument().print(Policy, Out,
+                                                   /*IncludeType=*/false);
+  }
+}
+
 void DeclPrinter::VisitOpenACCDeclareDecl(OpenACCDeclareDecl *D) {
   if (!D->isInvalidDecl()) {
     Out << "#pragma acc declare";
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index b6bb611..e5fba1b 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -1708,3 +1708,70 @@ TemplateParameterList *clang::getReplacedTemplateParameterList(const Decl *D) {
     llvm_unreachable("Unhandled templated declaration kind");
   }
 }
+
+const Decl &clang::adjustDeclToTemplate(const Decl &D) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(&D)) {
+    // Is this function declaration part of a function template?
+    if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
+      return *FTD;
+
+    // Nothing to do if function is not an implicit instantiation.
+    if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation)
+      return D;
+
+    // Function is an implicit instantiation of a function template?
+    if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate())
+      return *FTD;
+
+    // Function is instantiated from a member definition of a class template?
+    if (const FunctionDecl *MemberDecl =
+            FD->getInstantiatedFromMemberFunction())
+      return *MemberDecl;
+
+    return D;
+  }
+  if (const auto *VD = dyn_cast<VarDecl>(&D)) {
+    // Static data member is instantiated from a member definition of a class
+    // template?
+    if (VD->isStaticDataMember())
+      if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember())
+        return *MemberDecl;
+
+    return D;
+  }
+  if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) {
+    // Is this class declaration part of a class template?
+    if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate())
+      return *CTD;
+
+    // Class is an implicit instantiation of a class template or partial
+    // specialization?
+    if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) {
+      if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation)
+        return D;
+      llvm::PointerUnion<ClassTemplateDecl *,
+                         ClassTemplatePartialSpecializationDecl *>
+          PU = CTSD->getSpecializedTemplateOrPartial();
+      return isa<ClassTemplateDecl *>(PU)
+                 ? *static_cast<const Decl *>(cast<ClassTemplateDecl *>(PU))
+                 : *static_cast<const Decl *>(
+                       cast<ClassTemplatePartialSpecializationDecl *>(PU));
+    }
+
+    // Class is instantiated from a member definition of a class template?
+    if (const MemberSpecializationInfo *Info =
+            CRD->getMemberSpecializationInfo())
+      return *Info->getInstantiatedFrom();
+
+    return D;
+  }
+  if (const auto *ED = dyn_cast<EnumDecl>(&D)) {
+    // Enum is instantiated from a member definition of a class template?
+    if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum())
+      return *MemberDecl;
+
+    return D;
+  }
+  // FIXME: Adjust alias templates?
+  return D;
+}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7bf28d9..618e163 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12168,6 +12168,97 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return Success(R, E);
   }
 
+  case X86::BI__builtin_ia32_pternlogd128_mask:
+  case X86::BI__builtin_ia32_pternlogd256_mask:
+  case X86::BI__builtin_ia32_pternlogd512_mask:
+  case X86::BI__builtin_ia32_pternlogq128_mask:
+  case X86::BI__builtin_ia32_pternlogq256_mask:
+  case X86::BI__builtin_ia32_pternlogq512_mask: {
+    APValue AValue, BValue, CValue, ImmValue, UValue;
+    if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+        !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+        !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+        !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+        !EvaluateAsRValue(Info, E->getArg(4), UValue))
+      return false;
+
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+    APInt Imm = ImmValue.getInt();
+    APInt U = UValue.getInt();
+    unsigned ResultLen = AValue.getVectorLength();
+    SmallVector<APValue, 16> ResultElements;
+    ResultElements.reserve(ResultLen);
+
+    for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+      APInt ALane = AValue.getVectorElt(EltNum).getInt();
+      APInt BLane = BValue.getVectorElt(EltNum).getInt();
+      APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+      if (U[EltNum]) {
+        unsigned BitWidth = ALane.getBitWidth();
+        APInt ResLane(BitWidth, 0);
+
+        for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+          unsigned ABit = ALane[Bit];
+          unsigned BBit = BLane[Bit];
+          unsigned CBit = CLane[Bit];
+
+          unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+          ResLane.setBitVal(Bit, Imm[Idx]);
+        }
+        ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+      } else {
+        ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned)));
+      }
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+  case X86::BI__builtin_ia32_pternlogd128_maskz:
+  case X86::BI__builtin_ia32_pternlogd256_maskz:
+  case X86::BI__builtin_ia32_pternlogd512_maskz:
+  case X86::BI__builtin_ia32_pternlogq128_maskz:
+  case X86::BI__builtin_ia32_pternlogq256_maskz:
+  case X86::BI__builtin_ia32_pternlogq512_maskz: {
+    APValue AValue, BValue, CValue, ImmValue, UValue;
+    if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+        !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+        !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+        !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+        !EvaluateAsRValue(Info, E->getArg(4), UValue))
+      return false;
+
+    QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+    bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+    APInt Imm = ImmValue.getInt();
+    APInt U = UValue.getInt();
+    unsigned ResultLen = AValue.getVectorLength();
+    SmallVector<APValue, 16> ResultElements;
+    ResultElements.reserve(ResultLen);
+
+    for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+      APInt ALane = AValue.getVectorElt(EltNum).getInt();
+      APInt BLane = BValue.getVectorElt(EltNum).getInt();
+      APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+      unsigned BitWidth = ALane.getBitWidth();
+      APInt ResLane(BitWidth, 0);
+
+      if (U[EltNum]) {
+        for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+          unsigned ABit = ALane[Bit];
+          unsigned BBit = BLane[Bit];
+          unsigned CBit = CLane[Bit];
+
+          unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+          ResLane.setBitVal(Bit, Imm[Idx]);
+        }
+      }
+      ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
+
   case Builtin::BI__builtin_elementwise_clzg:
   case Builtin::BI__builtin_elementwise_ctzg: {
     APValue SourceLHS;
@@ -14265,7 +14356,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         !EvaluateInteger(E->getArg(1), Amt, Info))
       return false;
 
-    return Success(Val.rotl(Amt.urem(Val.getBitWidth())), E);
+    return Success(Val.rotl(Amt), E);
   }
 
   case Builtin::BI__builtin_rotateright8:
@@ -14282,7 +14373,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         !EvaluateInteger(E->getArg(1), Amt, Info))
       return false;
 
-    return Success(Val.rotr(Amt.urem(Val.getBitWidth())), E);
+    return Success(Val.rotr(Amt), E);
   }
 
   case Builtin::BI__builtin_elementwise_add_sat: {
diff --git a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
index ed827ac..03d6ed8 100644
--- a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
+++ b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
@@ -14,6 +14,9 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringMap.h"
 
 #define DEBUG_TYPE "dataflow"
 
@@ -79,18 +82,41 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst,
 
   if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr &&
                              SrcDecl->isDerivedFrom(DstDecl))) {
+    // Dst may have children modeled from other derived types than SrcType, e.g.
+    // after casts of Dst to other types derived from DstType. Only copy the
+    // children and synthetic fields present in both Dst and SrcType.
+    const FieldSet FieldsInSrcType =
+        Env.getDataflowAnalysisContext().getModeledFields(SrcType);
     for (auto [Field, DstFieldLoc] : Dst.children())
-      copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
+      if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
+          FieldAsFieldDecl && FieldsInSrcType.contains(FieldAsFieldDecl))
+        copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
+    const llvm::StringMap<QualType> SyntheticFieldsForSrcType =
+        Env.getDataflowAnalysisContext().getSyntheticFields(SrcType);
     for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields())
-      copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
-                         *DstFieldLoc, Env);
+      if (SyntheticFieldsForSrcType.contains(Name))
+        copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
+                           *DstFieldLoc, Env);
   } else if (SrcDecl != nullptr && DstDecl != nullptr &&
              DstDecl->isDerivedFrom(SrcDecl)) {
-    for (auto [Field, SrcFieldLoc] : Src.children())
-      copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
-    for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields())
-      copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
-                         Dst.getSyntheticField(Name), Env);
+    // Src may have children modeled from other derived types than DstType, e.g.
+    // after other casts of Src to those types (likely in different branches,
+    // but without flow-condition-dependent field modeling). Only copy the
+    // children and synthetic fields of Src that are present in DstType.
+    const FieldSet FieldsInDstType =
+        Env.getDataflowAnalysisContext().getModeledFields(DstType);
+    for (auto [Field, SrcFieldLoc] : Src.children()) {
+      if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
+          FieldAsFieldDecl && FieldsInDstType.contains(FieldAsFieldDecl))
+        copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
+    }
+    const llvm::StringMap<QualType> SyntheticFieldsForDstType =
+        Env.getDataflowAnalysisContext().getSyntheticFields(DstType);
+    for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) {
+      if (SyntheticFieldsForDstType.contains(Name))
+        copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
+                           Dst.getSyntheticField(Name), Env);
+    }
   } else {
     for (const FieldDecl *Field :
          Env.getDataflowAnalysisContext().getModeledFields(TypeToCopy)) {
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index c18b8fb..6196ec3 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -19,12 +19,13 @@
 #include "llvm/ADT/ImmutableMap.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TimeProfiler.h"
 #include <cstdint>
 #include <memory>
+#include <optional>
 
 namespace clang::lifetimes {
 namespace internal {
@@ -872,22 +873,19 @@ public:
     InStates[Start] = D.getInitialState();
     W.enqueueBlock(Start);
 
-    llvm::SmallBitVector Visited(Cfg.getNumBlockIDs() + 1);
-
     while (const CFGBlock *B = W.dequeue()) {
-      Lattice StateIn = getInState(B);
+      Lattice StateIn = *getInState(B);
       Lattice StateOut = transferBlock(B, StateIn);
       OutStates[B] = StateOut;
-      Visited.set(B->getBlockID());
       for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) {
         if (!AdjacentB)
           continue;
-        Lattice OldInState = getInState(AdjacentB);
-        Lattice NewInState = D.join(OldInState, StateOut);
+        std::optional<Lattice> OldInState = getInState(AdjacentB);
+        Lattice NewInState =
+            !OldInState ? StateOut : D.join(*OldInState, StateOut);
         // Enqueue the adjacent block if its in-state has changed or if we have
-        // never visited it.
-        if (!Visited.test(AdjacentB->getBlockID()) ||
-            NewInState != OldInState) {
+        // never seen it.
+        if (!OldInState || NewInState != *OldInState) {
           InStates[AdjacentB] = NewInState;
           W.enqueueBlock(AdjacentB);
         }
@@ -898,7 +896,12 @@ public:
 protected:
   Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); }
 
-  Lattice getInState(const CFGBlock *B) const { return InStates.lookup(B); }
+  std::optional<Lattice> getInState(const CFGBlock *B) const {
+    auto It = InStates.find(B);
+    if (It == InStates.end())
+      return std::nullopt;
+    return It->second;
+  }
 
   Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); }
 
@@ -974,19 +977,21 @@ static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A,
   return A;
 }
 
-/// Checks if set A is a subset of set B.
-template <typename T>
-static bool isSubsetOf(const llvm::ImmutableSet<T> &A,
-                       const llvm::ImmutableSet<T> &B) {
-  // Empty set is a subset of all sets.
-  if (A.isEmpty())
-    return true;
-
-  for (const T &Elem : A)
-    if (!B.contains(Elem))
-      return false;
-  return true;
-}
+/// Describes the strategy for joining two `ImmutableMap` instances, primarily
+/// differing in how they handle keys that are unique to one of the maps.
+///
+/// A `Symmetric` join is universally correct, while an `Asymmetric` join
+/// serves as a performance optimization. The latter is applicable only when the
+/// join operation possesses a left identity element, allowing for a more
+/// efficient, one-sided merge.
+enum class JoinKind {
+  /// A symmetric join applies the `JoinValues` operation to keys unique to
+  /// either map, ensuring that values from both maps contribute to the result.
+  Symmetric,
+  /// An asymmetric join preserves keys unique to the first map as-is, while
+  /// applying the `JoinValues` operation only to keys unique to the second map.
+  Asymmetric,
+};
 
 /// Computes the key-wise union of two ImmutableMaps.
 // TODO(opt): This key-wise join is a performance bottleneck. A more
@@ -994,22 +999,29 @@ static bool isSubsetOf(const llvm::ImmutableSet<T> &A,
 // instead of the current AVL-tree-based ImmutableMap.
 template <typename K, typename V, typename Joiner>
 static llvm::ImmutableMap<K, V>
-join(llvm::ImmutableMap<K, V> A, llvm::ImmutableMap<K, V> B,
-     typename llvm::ImmutableMap<K, V>::Factory &F, Joiner JoinValues) {
+join(const llvm::ImmutableMap<K, V> &A, const llvm::ImmutableMap<K, V> &B,
+     typename llvm::ImmutableMap<K, V>::Factory &F, Joiner JoinValues,
+     JoinKind Kind) {
   if (A.getHeight() < B.getHeight())
-    std::swap(A, B);
+    return join(B, A, F, JoinValues, Kind);
 
   // For each element in B, join it with the corresponding element in A
   // (or with an empty value if it doesn't exist in A).
+  llvm::ImmutableMap<K, V> Res = A;
   for (const auto &Entry : B) {
     const K &Key = Entry.first;
     const V &ValB = Entry.second;
-    if (const V *ValA = A.lookup(Key))
-      A = F.add(A, Key, JoinValues(*ValA, ValB));
-    else
-      A = F.add(A, Key, ValB);
+    Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB));
+  }
+  if (Kind == JoinKind::Symmetric) {
+    for (const auto &Entry : A) {
+      const K &Key = Entry.first;
+      const V &ValA = Entry.second;
+      if (!B.contains(Key))
+        Res = F.add(Res, Key, JoinValues(&ValA, nullptr));
+    }
   }
-  return A;
+  return Res;
 }
 } // namespace utils
 
@@ -1017,19 +1029,6 @@ join(llvm::ImmutableMap<K, V> A, llvm::ImmutableMap<K, V> B,
 //                          Loan Propagation Analysis
 // ========================================================================= //
 
-using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>;
-using ExpiredLoanMap = llvm::ImmutableMap<LoanID, const ExpireFact *>;
-
-/// An object to hold the factories for immutable collections, ensuring
-/// that all created states share the same underlying memory management.
-struct LifetimeFactory {
-  llvm::BumpPtrAllocator Allocator;
-  OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false};
-  LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false};
-  ExpiredLoanMap::Factory ExpiredLoanMapFactory{Allocator,
-                                                /*canonicalize=*/false};
-};
-
 /// Represents the dataflow lattice for loan propagation.
 ///
 /// This lattice tracks which loans each origin may hold at a given program
@@ -1073,10 +1072,10 @@ class LoanPropagationAnalysis
 
 public:
   LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F,
-                          LifetimeFactory &LFactory)
-      : DataflowAnalysis(C, AC, F),
-        OriginLoanMapFactory(LFactory.OriginMapFactory),
-        LoanSetFactory(LFactory.LoanSetFactory) {}
+                          OriginLoanMap::Factory &OriginLoanMapFactory,
+                          LoanSet::Factory &LoanSetFactory)
+      : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory),
+        LoanSetFactory(LoanSetFactory) {}
 
   using Base::transfer;
 
@@ -1087,11 +1086,19 @@ public:
   /// Merges two lattices by taking the union of loans for each origin.
   // TODO(opt): Keep the state small by removing origins which become dead.
   Lattice join(Lattice A, Lattice B) {
-    OriginLoanMap JoinedOrigins =
-        utils::join(A.Origins, B.Origins, OriginLoanMapFactory,
-                    [&](LoanSet S1, LoanSet S2) {
-                      return utils::join(S1, S2, LoanSetFactory);
-                    });
+    OriginLoanMap JoinedOrigins = utils::join(
+        A.Origins, B.Origins, OriginLoanMapFactory,
+        [&](const LoanSet *S1, const LoanSet *S2) {
+          assert((S1 || S2) && "unexpectedly merging 2 empty sets");
+          if (!S1)
+            return *S2;
+          if (!S2)
+            return *S1;
+          return utils::join(*S1, *S2, LoanSetFactory);
+        },
+        // Asymmetric join is a performance win. For origins present only on one
+        // branch, the loan set can be carried over as-is.
+        utils::JoinKind::Asymmetric);
     return Lattice(JoinedOrigins);
   }
 
@@ -1120,12 +1127,12 @@ public:
         OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans));
   }
 
-  LoanSet getLoans(OriginID OID, ProgramPoint P) {
+  LoanSet getLoans(OriginID OID, ProgramPoint P) const {
     return getLoans(getState(P), OID);
   }
 
 private:
-  LoanSet getLoans(Lattice L, OriginID OID) {
+  LoanSet getLoans(Lattice L, OriginID OID) const {
     if (auto *Loans = L.Origins.lookup(OID))
       return *Loans;
     return LoanSetFactory.getEmptySet();
@@ -1133,96 +1140,195 @@ private:
 };
 
 // ========================================================================= //
-//                         Expired Loans Analysis
+//                         Live Origins Analysis
+// ========================================================================= //
+//
+// A backward dataflow analysis that determines which origins are "live" at each
+// program point. An origin is "live" at a program point if there's a potential
+// future use of the pointer it represents. Liveness is "generated" by a read of
+// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being
+// live) when its loan set is overwritten (e.g. a OriginFlow killing the
+// destination origin).
+//
+// This information is used for detecting use-after-free errors, as it allows us
+// to check if a live origin holds a loan to an object that has already expired.
 // ========================================================================= //
 
-/// The dataflow lattice for tracking the set of expired loans.
-struct ExpiredLattice {
-  /// Map from an expired `LoanID` to the `ExpireFact` that made it expire.
-  ExpiredLoanMap Expired;
+/// Information about why an origin is live at a program point.
+struct LivenessInfo {
+  /// The use that makes the origin live. If liveness is propagated from
+  /// multiple uses along different paths, this will point to the use appearing
+  /// earlier in the translation unit.
+  /// This is 'null' when the origin is not live.
+  const UseFact *CausingUseFact;
+  /// The kind of liveness of the origin.
+  /// `Must`: The origin is live on all control-flow paths from the current
+  /// point to the function's exit (i.e. the current point is dominated by a set
+  /// of uses).
+  /// `Maybe`: indicates it is live on some but not all paths.
+  ///
+  /// This determines the diagnostic's confidence level.
+  /// `Must`-be-alive at expiration implies a definite use-after-free,
+  /// while `Maybe`-be-alive suggests a potential one on some paths.
+  LivenessKind Kind;
+
+  LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {}
+  LivenessInfo(const UseFact *UF, LivenessKind K)
+      : CausingUseFact(UF), Kind(K) {}
+
+  bool operator==(const LivenessInfo &Other) const {
+    return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind;
+  }
+  bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); }
+
+  void Profile(llvm::FoldingSetNodeID &IDBuilder) const {
+    IDBuilder.AddPointer(CausingUseFact);
+    IDBuilder.Add(Kind);
+  }
+};
+
+using LivenessMap = llvm::ImmutableMap<OriginID, LivenessInfo>;
 
-  ExpiredLattice() : Expired(nullptr) {};
-  explicit ExpiredLattice(ExpiredLoanMap M) : Expired(M) {}
+/// The dataflow lattice for origin liveness analysis.
+/// It tracks which origins are live, why they're live (which UseFact),
+/// and the confidence level of that liveness.
+struct LivenessLattice {
+  LivenessMap LiveOrigins;
 
-  bool operator==(const ExpiredLattice &Other) const {
-    return Expired == Other.Expired;
+  LivenessLattice() : LiveOrigins(nullptr) {};
+
+  explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {}
+
+  bool operator==(const LivenessLattice &Other) const {
+    return LiveOrigins == Other.LiveOrigins;
   }
-  bool operator!=(const ExpiredLattice &Other) const {
+
+  bool operator!=(const LivenessLattice &Other) const {
     return !(*this == Other);
   }
 
-  void dump(llvm::raw_ostream &OS) const {
-    OS << "ExpiredLattice State:\n";
-    if (Expired.isEmpty())
+  void dump(llvm::raw_ostream &OS, const OriginManager &OM) const {
+    if (LiveOrigins.isEmpty())
       OS << "  <empty>\n";
-    for (const auto &[ID, _] : Expired)
-      OS << "  Loan " << ID << " is expired\n";
+    for (const auto &Entry : LiveOrigins) {
+      OriginID OID = Entry.first;
+      const LivenessInfo &Info = Entry.second;
+      OS << "  ";
+      OM.dump(OID, OS);
+      OS << " is ";
+      switch (Info.Kind) {
+      case LivenessKind::Must:
+        OS << "definitely";
+        break;
+      case LivenessKind::Maybe:
+        OS << "maybe";
+        break;
+      case LivenessKind::Dead:
+        llvm_unreachable("liveness kind of live origins should not be dead.");
+      }
+      OS << " live at this point\n";
+    }
   }
 };
 
-/// The analysis that tracks which loans have expired.
-class ExpiredLoansAnalysis
-    : public DataflowAnalysis<ExpiredLoansAnalysis, ExpiredLattice,
-                              Direction::Forward> {
-
-  ExpiredLoanMap::Factory &Factory;
+/// The analysis that tracks which origins are live, with granular information
+/// about the causing use fact and confidence level. This is a backward
+/// analysis.
+class LiveOriginAnalysis
+    : public DataflowAnalysis<LiveOriginAnalysis, LivenessLattice,
+                              Direction::Backward> {
+  FactManager &FactMgr;
+  LivenessMap::Factory &Factory;
 
 public:
-  ExpiredLoansAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F,
-                       LifetimeFactory &Factory)
-      : DataflowAnalysis(C, AC, F), Factory(Factory.ExpiredLoanMapFactory) {}
-
-  using Base::transfer;
+  LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F,
+                     LivenessMap::Factory &SF)
+      : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {}
+  using DataflowAnalysis<LiveOriginAnalysis, Lattice,
+                         Direction::Backward>::transfer;
 
-  StringRef getAnalysisName() const { return "ExpiredLoans"; }
+  StringRef getAnalysisName() const { return "LiveOrigins"; }
 
   Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); }
 
-  /// Merges two lattices by taking the union of the two expired loans.
-  Lattice join(Lattice L1, Lattice L2) {
-    return Lattice(
-        utils::join(L1.Expired, L2.Expired, Factory,
-                    // Take the last expiry fact to make this hermetic.
-                    [](const ExpireFact *F1, const ExpireFact *F2) {
-                      return F1->getExpiryLoc() > F2->getExpiryLoc() ? F1 : F2;
-                    }));
-  }
-
-  Lattice transfer(Lattice In, const ExpireFact &F) {
-    return Lattice(Factory.add(In.Expired, F.getLoanID(), &F));
-  }
-
-  // Removes the loan from the set of expired loans.
-  //
-  // When a loan is re-issued (e.g., in a loop), it is no longer considered
-  // expired. A loan can be in the expired set at the point of issue due to
-  // the dataflow state from a previous loop iteration being propagated along
-  // a backedge in the CFG.
-  //
-  // Note: This has a subtle false-negative though where a loan from previous
-  // iteration is not overwritten by a reissue. This needs careful tracking
-  // of loans "across iterations" which can be considered for future
-  // enhancements.
-  //
-  //    void foo(int safe) {
-  //      int* p = &safe;
-  //      int* q = &safe;
-  //      while (condition()) {
-  //        int x = 1;
-  //        p = &x;    // A loan to 'x' is issued to 'p' in every iteration.
-  //        if (condition()) {
-  //          q = p;
-  //        }
-  //        (void)*p; // OK  — 'p' points to 'x' from new iteration.
-  //        (void)*q; // UaF - 'q' still points to 'x' from previous iteration
-  //                  // which is now destroyed.
-  //      }
-  // }
-  Lattice transfer(Lattice In, const IssueFact &F) {
-    return Lattice(Factory.remove(In.Expired, F.getLoanID()));
+  /// Merges two lattices by combining liveness information.
+  /// When the same origin has different confidence levels, we take the lower
+  /// one.
+  Lattice join(Lattice L1, Lattice L2) const {
+    LivenessMap Merged = L1.LiveOrigins;
+    // Take the earliest UseFact to make the join hermetic and commutative.
+    auto CombineUseFact = [](const UseFact &A,
+                             const UseFact &B) -> const UseFact * {
+      return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A
+                                                                         : &B;
+    };
+    auto CombineLivenessKind = [](LivenessKind K1,
+                                  LivenessKind K2) -> LivenessKind {
+      assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead.");
+      assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead.");
+      // Only return "Must" if both paths are "Must", otherwise Maybe.
+      if (K1 == LivenessKind::Must && K2 == LivenessKind::Must)
+        return LivenessKind::Must;
+      return LivenessKind::Maybe;
+    };
+    auto CombineLivenessInfo = [&](const LivenessInfo *L1,
+                                   const LivenessInfo *L2) -> LivenessInfo {
+      assert((L1 || L2) && "unexpectedly merging 2 empty sets");
+      if (!L1)
+        return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe);
+      if (!L2)
+        return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe);
+      return LivenessInfo(
+          CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact),
+          CombineLivenessKind(L1->Kind, L2->Kind));
+    };
+    return Lattice(utils::join(
+        L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo,
+        // A symmetric join is required here. If an origin is live on one
+        // branch but not the other, its confidence must be demoted to `Maybe`.
+        utils::JoinKind::Symmetric));
+  }
+
+  /// A read operation makes the origin live with definite confidence, as it
+  /// dominates this program point. A write operation kills the liveness of
+  /// the origin since it overwrites the value.
+  Lattice transfer(Lattice In, const UseFact &UF) {
+    OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr());
+    // Write kills liveness.
+    if (UF.isWritten())
+      return Lattice(Factory.remove(In.LiveOrigins, OID));
+    // Read makes origin live with definite confidence (dominates this point).
+    return Lattice(Factory.add(In.LiveOrigins, OID,
+                               LivenessInfo(&UF, LivenessKind::Must)));
+  }
+
+  /// Issuing a new loan to an origin kills its liveness.
+  Lattice transfer(Lattice In, const IssueFact &IF) {
+    return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID()));
   }
 
-  ExpiredLoanMap getExpiredLoans(ProgramPoint P) { return getState(P).Expired; }
+  /// An OriginFlow kills the liveness of the destination origin if `KillDest`
+  /// is true. Otherwise, it propagates liveness from destination to source.
+  Lattice transfer(Lattice In, const OriginFlowFact &OF) {
+    if (!OF.getKillDest())
+      return In;
+    return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID()));
+  }
+
+  LivenessMap getLiveOrigins(ProgramPoint P) const {
+    return getState(P).LiveOrigins;
+  }
+
+  // Dump liveness values on all test points in the program.
+  void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const {
+    llvm::dbgs() << "==========================================\n";
+    llvm::dbgs() << getAnalysisName() << " results:\n";
+    llvm::dbgs() << "==========================================\n";
+    for (const auto &Entry : LSA.getTestPoints()) {
+      OS << "TestPoint: " << Entry.getKey() << "\n";
+      getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr());
+    }
+  }
 };
 
 // ========================================================================= //
@@ -1240,84 +1346,75 @@ class LifetimeChecker {
 private:
   llvm::DenseMap<LoanID, PendingWarning> FinalWarningsMap;
   LoanPropagationAnalysis &LoanPropagation;
-  ExpiredLoansAnalysis &ExpiredLoans;
+  LiveOriginAnalysis &LiveOrigins;
   FactManager &FactMgr;
   AnalysisDeclContext &ADC;
   LifetimeSafetyReporter *Reporter;
 
 public:
-  LifetimeChecker(LoanPropagationAnalysis &LPA, ExpiredLoansAnalysis &ELA,
+  LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA,
                   FactManager &FM, AnalysisDeclContext &ADC,
                   LifetimeSafetyReporter *Reporter)
-      : LoanPropagation(LPA), ExpiredLoans(ELA), FactMgr(FM), ADC(ADC),
+      : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC),
         Reporter(Reporter) {}
 
   void run() {
     llvm::TimeTraceScope TimeProfile("LifetimeChecker");
     for (const CFGBlock *B : *ADC.getAnalysis<PostOrderCFGView>())
       for (const Fact *F : FactMgr.getFacts(B))
-        if (const auto *UF = F->getAs<UseFact>())
-          checkUse(UF);
+        if (const auto *EF = F->getAs<ExpireFact>())
+          checkExpiry(EF);
     issuePendingWarnings();
   }
 
-  /// Checks for use-after-free errors for a given use of an Origin.
+  /// Checks for use-after-free errors when a loan expires.
   ///
-  /// This method is called for each 'UseFact' identified in the control flow
-  /// graph. It determines if the loans held by the used origin have expired
-  /// at the point of use.
-  void checkUse(const UseFact *UF) {
-    if (UF->isWritten())
-      return;
-    OriginID O = UF->getUsedOrigin(FactMgr.getOriginMgr());
-
-    // Get the set of loans that the origin might hold at this program point.
-    LoanSet HeldLoans = LoanPropagation.getLoans(O, UF);
-
-    // Get the set of all loans that have expired at this program point.
-    ExpiredLoanMap AllExpiredLoans = ExpiredLoans.getExpiredLoans(UF);
-
-    // If the pointer holds no loans or no loans have expired, there's nothing
-    // to check.
-    if (HeldLoans.isEmpty() || AllExpiredLoans.isEmpty())
-      return;
-
-    // Identify loans that which have expired but are held by the pointer. Using
-    // them is a use-after-free.
-    llvm::SmallVector<LoanID> DefaultedLoans;
-    // A definite UaF error occurs if all loans the origin might hold have
-    // expired.
-    bool IsDefiniteError = true;
-    for (LoanID L : HeldLoans) {
-      if (AllExpiredLoans.contains(L))
-        DefaultedLoans.push_back(L);
-      else
-        // If at least one loan is not expired, this use is not a definite UaF.
-        IsDefiniteError = false;
+  /// This method examines all live origins at the expiry point and determines
+  /// if any of them hold the expiring loan. If so, it creates a pending
+  /// warning with the appropriate confidence level based on the liveness
+  /// information. The confidence reflects whether the origin is definitely
+  /// or maybe live at this point.
+  ///
+  /// Note: This implementation considers only the confidence of origin
+  /// liveness. Future enhancements could also consider the confidence of loan
+  /// propagation (e.g., a loan may only be held on some execution paths).
+  void checkExpiry(const ExpireFact *EF) {
+    LoanID ExpiredLoan = EF->getLoanID();
+    LivenessMap Origins = LiveOrigins.getLiveOrigins(EF);
+    Confidence CurConfidence = Confidence::None;
+    const UseFact *BadUse = nullptr;
+    for (auto &[OID, LiveInfo] : Origins) {
+      LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF);
+      if (!HeldLoans.contains(ExpiredLoan))
+        continue;
+      // Loan is defaulted.
+      Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind);
+      if (CurConfidence < NewConfidence) {
+        CurConfidence = NewConfidence;
+        BadUse = LiveInfo.CausingUseFact;
+      }
     }
-    // If there are no defaulted loans, the use is safe.
-    if (DefaultedLoans.empty())
+    if (!BadUse)
       return;
-
-    // Determine the confidence level of the error (definite or maybe).
-    Confidence CurrentConfidence =
-        IsDefiniteError ? Confidence::Definite : Confidence::Maybe;
-
-    // For each expired loan, create a pending warning.
-    for (LoanID DefaultedLoan : DefaultedLoans) {
-      // If we already have a warning for this loan with a higher or equal
-      // confidence, skip this one.
-      if (FinalWarningsMap.count(DefaultedLoan) &&
-          CurrentConfidence <= FinalWarningsMap[DefaultedLoan].ConfidenceLevel)
-        continue;
-
-      auto *EF = AllExpiredLoans.lookup(DefaultedLoan);
-      assert(EF && "Could not find ExpireFact for an expired loan.");
-
-      FinalWarningsMap[DefaultedLoan] = {/*ExpiryLoc=*/(*EF)->getExpiryLoc(),
-                                         /*UseExpr=*/UF->getUseExpr(),
-                                         /*ConfidenceLevel=*/CurrentConfidence};
+    // We have a use-after-free.
+    Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel;
+    if (LastConf >= CurConfidence)
+      return;
+    FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(),
+                                     /*UseExpr=*/BadUse->getUseExpr(),
+                                     /*ConfidenceLevel=*/CurConfidence};
+  }
+
+  static Confidence livenessKindToConfidence(LivenessKind K) {
+    switch (K) {
+    case LivenessKind::Must:
+      return Confidence::Definite;
+    case LivenessKind::Maybe:
+      return Confidence::Maybe;
+    case LivenessKind::Dead:
+      return Confidence::None;
     }
+    llvm_unreachable("unknown liveness kind");
   }
 
   void issuePendingWarnings() {
@@ -1336,6 +1433,15 @@ public:
 //                  LifetimeSafetyAnalysis Class Implementation
 // ========================================================================= //
 
+/// An object to hold the factories for immutable collections, ensuring
+/// that all created states share the same underlying memory management.
+struct LifetimeFactory {
+  llvm::BumpPtrAllocator Allocator;
+  OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false};
+  LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false};
+  LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false};
+};
+
 // We need this here for unique_ptr with forward declared class.
 LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default;
 
@@ -1366,15 +1472,16 @@ void LifetimeSafetyAnalysis::run() {
   ///    the analysis.
   /// 3. Collapse ExpireFacts belonging to same source location into a single
   ///    Fact.
-  LoanPropagation =
-      std::make_unique<LoanPropagationAnalysis>(Cfg, AC, *FactMgr, *Factory);
+  LoanPropagation = std::make_unique<LoanPropagationAnalysis>(
+      Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory);
   LoanPropagation->run();
 
-  ExpiredLoans =
-      std::make_unique<ExpiredLoansAnalysis>(Cfg, AC, *FactMgr, *Factory);
-  ExpiredLoans->run();
+  LiveOrigins = std::make_unique<LiveOriginAnalysis>(
+      Cfg, AC, *FactMgr, Factory->LivenessMapFactory);
+  LiveOrigins->run();
+  DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this));
 
-  LifetimeChecker Checker(*LoanPropagation, *ExpiredLoans, *FactMgr, AC,
+  LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC,
                           Reporter);
   Checker.run();
 }
@@ -1385,15 +1492,6 @@ LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID,
   return LoanPropagation->getLoans(OID, PP);
 }
 
-std::vector<LoanID>
-LifetimeSafetyAnalysis::getExpiredLoansAtPoint(ProgramPoint PP) const {
-  assert(ExpiredLoans && "ExpiredLoansAnalysis has not been run.");
-  std::vector<LoanID> Result;
-  for (const auto &pair : ExpiredLoans->getExpiredLoans(PP))
-    Result.push_back(pair.first);
-  return Result;
-}
-
 std::optional<OriginID>
 LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const {
   assert(FactMgr && "FactManager not initialized");
@@ -1413,6 +1511,15 @@ LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const {
   return Result;
 }
 
+std::vector<std::pair<OriginID, LivenessKind>>
+LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const {
+  assert(LiveOrigins && "LiveOriginAnalysis has not been run.");
+  std::vector<std::pair<OriginID, LivenessKind>> Result;
+  for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP))
+    Result.push_back({OID, Info.Kind});
+  return Result;
+}
+
 llvm::StringMap<ProgramPoint> LifetimeSafetyAnalysis::getTestPoints() const {
   assert(FactMgr && "FactManager not initialized");
   llvm::StringMap<ProgramPoint> AnnotationToPointMap;
diff --git a/clang/lib/Analysis/LifetimeSafety.md b/clang/lib/Analysis/LifetimeSafety.md
new file mode 100644
index 0000000..3f3d03d
--- /dev/null
+++ b/clang/lib/Analysis/LifetimeSafety.md
@@ -0,0 +1,230 @@
+Excellent! This is a very strong and logical structure for the white paper. It follows a clear narrative, starting from the high-level problem and progressively diving into the specifics of your solution. The sections on why a traditional borrow checker doesn't fit C++ and the open questions are particularly good, as they show a deep engagement with the problem space.
+
+Here is a draft of the white paper following your new skeleton, with the details filled in based on my analysis of your implementation and the provided reference documents. I've also incorporated some of my own suggestions to enhance the flow and clarity.
+
+***
+
+<Disclaimer: Public document. This work is licensed under the Apache License v2.0 with LLVM Exceptions. See [https://llvm.org/LICENSE.txt](https://llvm.org/LICENSE.txt) for license information.>
+
+# Lifetime Safety: An Intuitive Approach for Temporal Safety in C++
+**Author:**
+[Utkarsh Saxena](mailto:usx@google.com)
+
+**Purpose:** This document serves as a live RFC for a new lifetime safety analysis in C++, with the ultimate goal of publication as a white paper.
+
+## Intended Audience
+
+This document is intended for C++ compiler developers (especially those working on Clang), developers of other systems languages with advanced memory safety models (like Rust and Carbon), and all C++ users interested in writing safer code.
+
+## Goal
+
+*   To describe a new lifetime model for C++ that aims to maximize the compile-time detection of temporal memory safety issues.
+*   To explore a path toward incremental safety in C++, offering a spectrum of checks that can be adopted without requiring a full plunge into a restrictive ownership model.
+
+**Out of Scope**
+
+*   **Rigorous Temporal Memory Safety:** This analysis aims to detect a large class of common errors, but it does not formally prove the absence of all temporal safety bugs.
+*   **Runtime Solutions:** This paper focuses exclusively on static, compile-time analysis and does not cover runtime solutions like MTE or AddressSanitizer.
+
+# Paper: C++ Lifetimes Safety Analysis
+
+**Subtitle: A Flow-Sensitive, Alias-based Approach to Preventing Dangling Pointers**
+
+## Abstract
+
+This paper introduces a new intra-procedural, flow-sensitive lifetime analysis for C++ implemented in Clang. The analysis is designed to detect a significant class of temporal memory safety violations, such as use-after-free and use-after-return, at compile time. It is based on a model of "Loans" and "Origins," inspired by the Polonius borrow checker in Rust, but adapted for the semantics and flexibility of C++.
+
+The analysis works by translating the Clang CFG into a series of lifetime-relevant "Facts." These facts are then processed by dataflow analyses to precisely determine the validity of pointers and references at each program point. This fact-based approach, combined with a configurable strictness model, allows for both high-confidence error reporting and the detection of more subtle, potential bugs, without requiring extensive new annotations. The ultimate goal is to provide a powerful, low-overhead tool that makes C++ safer by default.
+
+## The Anatomy of a Temporal Safety Error
+
+At its core, a temporal safety error is a bug where an operation is performed on an object at a time when it is no longer valid to do so ([source](http://docs.google.com/document/d/19vbfAiV1yQu3xSMRWjyPUdzyB_LDdVUcKat_HWI1l3g?content_ref=at+its+core+a+temporal+safety+error+is+a+bug+where+an+operation+is+performed+on+an+object+at+a+time+when+it+is+no+longer+valid+to+do+so)). These bugs are notoriously difficult to debug because they often manifest as unpredictable crashes or silent data corruption far from the root cause. However, we can argue that this wide and varied class of errors—from use-after-free to iterator invalidation—all stem from a single, canonical pattern.
+
+**Conjecture: Any temporal safety issue is a form of Use-After-Free.**
+
+All sub-categories of temporal safety issues, such as returning a reference to a stack variable (`return-stack-addr`), using a variable after its scope has ended (`use-after-scope`), using heap memory after it has been deleted (`heap-use-after-free`), or using an iterator after its container has been modified (`use-after-invalidation`), can be described by a single sequence of events.
+
+In C++, an *object* is a region of storage, and pointers and references are the mechanisms we use to refer to them. A use-after-free occurs when we access an object after its lifetime has ended. But how can an object be accessed after it has been destroyed? This is only possible through an **alias**—a pointer or reference—that was created while the object was alive and that survived the object's destruction.
+
+This insight allows us to define a canonical use-after-free with four distinct events that happen in a specific order:
+
+1.  **`t0`: Creation.** An object `M` is created in some region of storage (on the stack, on the heap, etc.).
+2.  **`t1`: Alias Creation.** An alias `P` (a pointer or reference) is created that refers to the object `M`.
+3.  **`t2`: End of Lifetime.** The lifetime of object `M` ends (e.g., it is deallocated, or it goes out of scope).
+4.  **`t3`: Use of Alias.** The alias `P`, which now dangles, is used to access the memory where `M` once resided.
+
+Let's examine this with a simple piece of C++ code:
+
+```cpp
+void use_after_scope_example() {
+  int* p;
+  {
+    int s = 10;  // t0: Object `s` is created on the stack.
+    p = &s;      // t1: Alias `p` is made to refer to object `s`.
+  }              // t2: The lifetime of `s` ends. `p` now dangles.
+  *p = 42;       // t3: The dangling alias `p` is used. This is a use-after-free.
+}
+```
+
+The fundamental problem is that the alias `p` outlived the object `s` it referred to. The challenge for a static analysis is therefore clear: to prevent temporal safety errors, the compiler must be able to track aliases and understand the lifetime of the objects they refer to. It needs to know the "points-to" set for every alias at every point in the program and verify that, at the moment of use, the alias does not point to an object whose lifetime has ended.
+
+This alias-based perspective is powerful because it generalizes beautifully. The "end of lifetime" event at `t2` doesn't have to be a variable going out of scope. It could be:
+
+*   A call to `delete`, which ends the lifetime of a heap object.
+*   A function `return`, which ends the lifetime of all its local variables.
+*   A container modification, like `std::vector::push_back()`, which may reallocate storage, ending the lifetime of the objects in the old buffer and invalidating all existing iterators (aliases).
+
+By focusing on tracking aliases and their validity, we can build a unified model to detect a wide range of temporal safety errors without imposing the heavy "aliasing XOR mutability" restrictions of a traditional borrow checker ([source](https://gist.github.com/nmsmith/cdaa94aa74e8e0611221e65db8e41f7b?content_ref=the+major+advancement+is+to+eliminate+the+aliasing+xor+mutability+restriction+amongst+references+and+replace+it+with+a+similar+restriction+applied+to+lifetime+parameters)). This provides a more intuitive and C++-idiomatic path to memory safety.
+
+## Relation with Thread safety
+
+This analysis does not address Thread Safety. Thread safety is concerned with data races that occur across multiple threads. While it is possible to create temporal safety issues in multi-threaded scenarios, this analysis is focused on the sequential lifetime of objects within a single function.
+
+## Quest for Safer Aliasing
+
+Is it possible to achieve memory safety without a restrictive model like Rust's borrow checker? We believe the answer is yes. The key is to shift our focus from *restricting aliases* to *understanding them*. Instead of forbidding programs that have aliased mutable pointers, we can build a model that understands what each pointer can point to at any given time. This approach, similar to the one proposed in P1179 for C++ and explored in modern lifetime systems like Mojo's, allows us to directly detect the root cause of the problem: using a pointer after its target has ceased to exist ([source](http://docs.google.com/document/d/19vbfAiV1yQu3xSMRWjyPUdzyB_LDdVUcKat_HWI1l3g?content_ref=this+approach+similar+to+the+one+proposed+in+p1179+for+c+and+explored+in+modern+lifetime+systems+like+mojo+s+allows+us+to+directly+detect+the+root+cause+of+the+problem+using+a+pointer+after+its+target+has+ceased+to+exist)).
+
+This paper proposes such a model for C++. Let's begin with a simple, yet illustrative, dangling pointer bug:
+
+```cpp
+// Example 1: A simple use-after-free
+void definite_simple_case() {
+  MyObj* p;
+  {
+    MyObj s;
+    p = &s;     // 'p' now points to 's'
+  }             // 's' is destroyed, 'p' is now dangling
+  (void)*p;     // Use-after-free
+}
+```
+
+How can a compiler understand that the use of `p` is an error? It needs to answer a series of questions:
+
+1.  What does `p` point to?
+2.  When does the object `p` points to cease to be valid?
+3.  Is `p` used after that point?
+
+Our model is designed to answer precisely these questions.
+
+## Core Concepts
+
+Our model is built on a few core concepts that allow us to formally track the relationships between pointers and the data they point to.
+
+### Access Paths
+
+An **Access Path** is a symbolic representation of a storage location in the program ([source](https://raw.githubusercontent.com/llvm/llvm-project/0e7c1732a9a7d28549fe5d690083daeb0e5de6b2/clang/lib/Analysis/LifetimeSafety.cpp?content_ref=struct+accesspath+const+clang+valuedecl+d+accesspath+const+clang+valuedecl+d+d+d)). It provides a way to uniquely identify a variable or a sub-object. For now, we will consider simple paths that refer to top-level variables, but the model can be extended to include field accesses (`a.b`), array elements (`a[i]`), and pointer indirections (`p->field`).
+
+### Loans: The Act of Borrowing
+
+A **Loan** is created whenever a reference or pointer to an object is created. It represents the act of "borrowing" that object's storage location ([source](https://raw.githubusercontent.com/llvm/llvm-project/0e7c1732a9a7d28549fe5d690083daeb0e5de6b2/clang/lib/Analysis/LifetimeSafety.cpp?content_ref=information+about+a+single+borrow+or+loan+a+loan+is+created+when+a+reference+or+pointer+is+created)). Each loan is associated with a unique ID and the `AccessPath` of the object being borrowed.
+
+In our `definite_simple_case` example, the expression `&s` creates a loan. The `AccessPath` for this loan is the variable `s`.
+
+### Origins: The Provenance of a Pointer
+
+An **Origin** is a symbolic identifier that represents the *set of possible loans* a pointer-like object could hold at any given time ([source](http://docs.google.com/document/d/1JpJ3M9yeXX-BnC4oKXBvRWzxoFrwziN1RzI4DrMrSp8?content_ref=ime+is+a+symbolic+identifier+representing+a+set+of+loans+from+which+a+pointer+or+reference+could+have+originated)). Every pointer-like variable or expression in the program is associated with an origin.
+
+*   A variable declaration like `MyObj* p` introduces an origin for `p`.
+*   An expression like `&s` also has an origin.
+*   The complexity of origins can grow with type complexity. For example:
+    *   `int* p;` has a single origin.
+    *   `int** p;` has two origins: one for the outer pointer and one for the inner pointer. This allows us to distinguish between `p` itself being modified and what `*p` points to being modified.
+    *   `struct S { int* p; };` also has an origin associated with the member `p`.
+
+The central goal of our analysis is to determine, for each origin at each point in the program, which loans it might contain.
+
+## Subtyping Rules and Subset Constraints
+
+The relationships between origins are established through the program's semantics, particularly assignments. When a pointer is assigned to another, as in `p = q`, the set of loans that `q` holds must be a subset of the loans that `p` can now hold. This is a fundamental subtyping rule: for `T*'a` to be a subtype of `T*'b`, the set of loans represented by `'a` must be a subset of the loans represented by `'b`.
+
+This leads to the concept of **subset constraints**. An assignment `p = q` generates a constraint `Origin(q) ⊆ Origin(p)`. The analysis doesn't solve these as a global system of equations. Instead, as we will see, it propagates the *consequences* of these constraints—the loans themselves—through the control-flow graph. This is a key departure from the Polonius model, which focuses on propagating the constraints (`'a: 'b`) themselves.
+
+## Invalidations: When Loans Expire
+
+A loan expires when the object it refers to is no longer valid. In our model, this is an **invalidation** event. The most common invalidation is deallocation, which in C++ can mean:
+*   A stack variable going out of scope.
+*   A `delete` call on a heap-allocated object.
+*   A container modification that reallocates its internal storage.
+
+## An Event-Based Representation of the Function
+
+To analyze a function, we first transform its CFG into a sequence of atomic, lifetime-relevant **Events**, which we call **Facts**. These facts abstract away the complexities of C++ syntax and provide a clean input for our analysis. The main facts are:
+
+*   `Issue(LoanID, OriginID)`: A new loan is created. For example, `&s` generates an `Issue` fact.
+*   `Expire(LoanID)`: A loan expires. This is generated at the end of a variable's scope.
+*   `OriginFlow(Dest, Src, Kill)`: Loans from a source origin flow to a destination origin, as in an assignment. `Kill` indicates whether the destination's old loans are cleared.
+*   `Use(OriginID)`: An origin is used, such as in a pointer dereference.
+
+Let's trace our `definite_simple_case` example with these facts:
+
+```cpp
+void definite_simple_case() {
+  MyObj* p; // Origin for p is O_p
+  {
+    MyObj s;
+    // The expression `&s` generates:
+    //   - IssueFact(L1, O_&s)  (A new loan L1 on 's' is created)
+    // The assignment `p = &s` generates:
+    //   - OriginFlowFact(O_p, O_&s, Kill=true)
+    p = &s;
+  } // The end of the scope for 's' generates:
+    //   - ExpireFact(L1)
+  // The dereference `*p` generates:
+  //   - UseFact(O_p)
+  (void)*p;
+}
+```
+
+## Flow-Sensitive Lifetime Policy
+
+With the program represented as a stream of facts, we can now define a flow-sensitive policy to answer our three core questions. We do this by maintaining a map from `Origin` to `Set<Loan>` at each program point. This map represents the state of our analysis.
+
+The analysis proceeds as follows:
+1.  **Forward Propagation of Loans:** We perform a forward dataflow analysis.
+    *   When we encounter an `Issue` fact, we add the new loan to its origin's loan set.
+    *   When we see an `OriginFlow` fact, we update the destination origin's loan set with the loans from the source.
+    *   At control-flow merge points, we take the *union* of the loan sets from all incoming branches.
+
+2.  **Backward Propagation of Liveness:** We then perform a backward dataflow analysis, starting from `Use` facts.
+    *   A `Use` of an origin marks it as "live."
+    *   This liveness information is propagated backward. If an origin `O_p` is live, and it received its loans from `O_q`, then `O_q` is also considered live at that point.
+
+3.  **Error Detection:** An error is flagged when the analysis determines that a **live** origin contains a loan that has **expired**.
+
+In our `definite_simple_case` example:
+*   The forward analysis determines that at the point of use, `Origin(p)` contains `Loan(s)`.
+*   The backward analysis determines that at the point where `s` is destroyed, `Origin(p)` is live.
+*   The `ExpireFact` for `Loan(s)` occurs before the `UseFact`.
+*   The combination of these three conditions triggers a use-after-free error.
+
+## Without Functions, Our Work is Done Here!
+
+The model described so far works perfectly for a single, monolithic function. However, the moment we introduce function calls, the problem becomes more complex. How do we reason about lifetimes across function boundaries, especially when we can't see the implementation of the called function?
+
+### Effects of a Function Call
+
+A function call has inputs and outputs. From a lifetime perspective, the key challenge is to understand how the lifetimes of the outputs relate to the lifetimes of the inputs.
+
+### Outlives Constraints and Placeholder Origins
+
+When analyzing a function like `const char* get_prefix(const string& s, int len)`, we don't know the specific lifetime of the `s` that will be passed by the caller. To handle this, we introduce **placeholder origins** for the input parameters. These placeholders act as variables in our analysis.
+
+If a function returns a pointer or reference, its lifetime must be tied to one of its inputs. This is an **outlives constraint**. For example, the return value of `get_prefix` must "outlive" the input `s`. In our model, this means the origin of the return value will contain the placeholder loan associated with `s`.
+
+### Opaque Functions
+
+What if a function's implementation is not visible (e.g., it's in a separate translation unit), and it has no lifetime annotations? In this case, we must be conservative. If we pass a pointer to an opaque function, we have to assume it might have been invalidated. Our model handles this by associating a special **OPAQUE loan** with the pointer after the call, signifying that its lifetime is now unknown.
+
+## Why a Borrow Checker is Not the Right Fit for C++
+
+The "aliasing XOR mutability" rule, while powerful, is fundamentally at odds with many idiomatic C++ patterns.
+*   **Observer Patterns:** It's common to have multiple non-owning pointers observing a mutable object.
+*   **Intrusive Data Structures:** Data structures like intrusive linked lists require objects to hold pointers to one another, creating cycles that are difficult for a traditional borrow checker to handle.
+*   **Iterator Invalidation:** The core problem in C++ is often not aliasing itself, but the fact that a mutation can invalidate an alias (e.g., resizing a vector). An alias-based analysis, like the one proposed here, directly models this problem, whereas a borrow checker can feel like an indirect and overly restrictive solution.
+
+By focusing on tracking what pointers can point to, our model avoids rejecting these safe and useful patterns, making it a more natural fit for the existing C++ ecosystem.
+
+## Open Questions
+
+*   **When and if to introduce the term "lifetime"?** The term "lifetime" is heavily associated with Rust's model. This paper has intentionally focused on "Origins" and "Loans" to avoid confusion. Is there a point where introducing "lifetime" would be helpful, or should we stick to the new terminology?
+*   **Syntax for Annotations:** While this model is designed to work with minimal annotations, some will be necessary for complex cases. What should the syntax for these annotations look like? Can we build on existing attributes like `[[clang::lifetimebound]]`?
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index dc3778b..2b89370 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -537,33 +537,16 @@ WarningsSpecialCaseList::create(const llvm::MemoryBuffer &Input,
 }
 
 void WarningsSpecialCaseList::processSections(DiagnosticsEngine &Diags) {
-  // Drop the default section introduced by special case list, we only support
-  // exact diagnostic group names.
-  // FIXME: We should make this configurable in the parser instead.
-  // FIXME: C++20 can use std::erase_if(Sections, [](Section &sec) { return
-  // sec.SectionStr == "*"; });
-  llvm::erase_if(Sections, [](Section &sec) { return sec.SectionStr == "*"; });
-  // Make sure we iterate sections by their line numbers.
-  std::vector<std::pair<unsigned, const Section *>> LineAndSectionEntry;
-  LineAndSectionEntry.reserve(Sections.size());
-  for (const auto &Entry : Sections) {
-    StringRef DiagName = Entry.SectionStr;
-    // Each section has a matcher with that section's name, attached to that
-    // line.
-    const auto &DiagSectionMatcher = Entry.SectionMatcher;
-    unsigned DiagLine = 0;
-    for (const auto &Glob : DiagSectionMatcher->Globs)
-      if (Glob->Name == DiagName) {
-        DiagLine = Glob->LineNo;
-        break;
-      }
-    LineAndSectionEntry.emplace_back(DiagLine, &Entry);
-  }
-  llvm::sort(LineAndSectionEntry);
   static constexpr auto WarningFlavor = clang::diag::Flavor::WarningOrError;
-  for (const auto &[_, SectionEntry] : LineAndSectionEntry) {
+  for (const auto &SectionEntry : Sections) {
+    StringRef DiagGroup = SectionEntry.SectionStr;
+    if (DiagGroup == "*") {
+      // Drop the default section introduced by special case list, we only
+      // support exact diagnostic group names.
+      // FIXME: We should make this configurable in the parser instead.
+      continue;
+    }
     SmallVector<diag::kind> GroupDiags;
-    StringRef DiagGroup = SectionEntry->SectionStr;
     if (Diags.getDiagnosticIDs()->getDiagnosticsInGroup(
             WarningFlavor, DiagGroup, GroupDiags)) {
       StringRef Suggestion =
@@ -576,7 +559,7 @@ void WarningsSpecialCaseList::processSections(DiagnosticsEngine &Diags) {
     for (diag::kind Diag : GroupDiags)
       // We're intentionally overwriting any previous mappings here to make sure
       // latest one takes precedence.
-      DiagToSection[Diag] = SectionEntry;
+      DiagToSection[Diag] = &SectionEntry;
   }
 }
 
diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp
index f7bc1d5..a1dc4a7 100644
--- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp
+++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp
@@ -38,11 +38,11 @@ SanitizerSpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
 }
 
 void SanitizerSpecialCaseList::createSanitizerSections() {
-  for (auto &S : Sections) {
+  for (const auto &S : Sections) {
     SanitizerMask Mask;
 
 #define SANITIZER(NAME, ID)                                                    \
-  if (S.SectionMatcher->match(NAME))                                           \
+  if (S.SectionMatcher.match(NAME))                                            \
     Mask |= SanitizerKind::ID;
 #define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID)
 
@@ -50,7 +50,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() {
 #undef SANITIZER
 #undef SANITIZER_GROUP
 
-    SanitizerSections.emplace_back(Mask, S.Entries, S.FileIdx);
+    SanitizerSections.emplace_back(Mask, S);
   }
 }
 
@@ -66,10 +66,9 @@ SanitizerSpecialCaseList::inSectionBlame(SanitizerMask Mask, StringRef Prefix,
                                          StringRef Category) const {
   for (const auto &S : llvm::reverse(SanitizerSections)) {
     if (S.Mask & Mask) {
-      unsigned LineNum =
-          SpecialCaseList::inSectionBlame(S.Entries, Prefix, Query, Category);
+      unsigned LineNum = S.S.getLastMatch(Prefix, Query, Category);
       if (LineNum > 0)
-        return {S.FileIdx, LineNum};
+        return {S.S.FileIdx, LineNum};
     }
   }
   return NotFound;
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 552698a..dfcc7940 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -319,9 +319,12 @@ public:
       Opts["__opencl_c_images"] = true;
       Opts["__opencl_c_3d_image_writes"] = true;
       Opts["cl_khr_3d_image_writes"] = true;
+      Opts["__opencl_c_program_scope_global_variables"] = true;
 
-      Opts["__opencl_c_generic_address_space"] =
-          GPUKind >= llvm::AMDGPU::GK_GFX700;
+      if (GPUKind >= llvm::AMDGPU::GK_GFX700) {
+        Opts["__opencl_c_generic_address_space"] = true;
+        Opts["__opencl_c_device_enqueue"] = true;
+      }
     }
   }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
index 5f1faab..df42af8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp
@@ -15,6 +15,7 @@
 #include "CIRGenFunction.h"
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/GlobalDecl.h"
 
 using namespace clang;
@@ -75,3 +76,20 @@ void CIRGenCXXABI::setCXXABIThisValue(CIRGenFunction &cgf,
   assert(getThisDecl(cgf) && "no 'this' variable for function");
   cgf.cxxabiThisValue = thisPtr;
 }
+
+CharUnits CIRGenCXXABI::getArrayCookieSize(const CXXNewExpr *e) {
+  if (!requiresArrayCookie(e))
+    return CharUnits::Zero();
+
+  cgm.errorNYI(e->getSourceRange(), "CIRGenCXXABI::getArrayCookieSize");
+  return CharUnits::Zero();
+}
+
+bool CIRGenCXXABI::requiresArrayCookie(const CXXNewExpr *e) {
+  // If the class's usual deallocation function takes two arguments,
+  // it needs a cookie.
+  if (e->doesUsualArrayDeleteWantSize())
+    return true;
+
+  return e->getAllocatedType().isDestructedType();
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
index 1dee774..2465a68 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
@@ -28,6 +28,8 @@ protected:
   CIRGenModule &cgm;
   std::unique_ptr<clang::MangleContext> mangleContext;
 
+  virtual bool requiresArrayCookie(const CXXNewExpr *e);
+
 public:
   // TODO(cir): make this protected when target-specific CIRGenCXXABIs are
   // implemented.
@@ -113,6 +115,7 @@ public:
                                           CIRGenFunction &cgf) = 0;
 
   virtual void emitRethrow(CIRGenFunction &cgf, bool isNoReturn) = 0;
+  virtual void emitThrow(CIRGenFunction &cgf, const CXXThrowExpr *e) = 0;
 
   virtual mlir::Attribute getAddrOfRTTIDescriptor(mlir::Location loc,
                                                   QualType ty) = 0;
@@ -244,6 +247,19 @@ public:
   void setStructorImplicitParamValue(CIRGenFunction &cgf, mlir::Value val) {
     cgf.cxxStructorImplicitParamValue = val;
   }
+
+  /**************************** Array cookies ******************************/
+
+  /// Returns the extra size required in order to store the array
+  /// cookie for the given new-expression.  May return 0 to indicate that no
+  /// array cookie is required.
+  ///
+  /// Several cases are filtered out before this method is called:
+  ///   - non-array allocations never need a cookie
+  ///   - calls to \::operator new(size_t, void*) never need a cookie
+  ///
+  /// \param E - the new-expression being allocated.
+  virtual CharUnits getArrayCookieSize(const CXXNewExpr *e);
 };
 
 /// Creates and Itanium-family ABI
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index 9d12a13..8f4377b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -690,7 +690,7 @@ void CIRGenFunction::emitCXXAggrConstructorCall(
   // every temporary created in a default argument expression is sequenced
   // before the construction of the next array element, if any.
   {
-    assert(!cir::MissingFeatures::runCleanupsScope());
+    RunCleanupsScope scope(*this);
 
     // Evaluate the constructor and its arguments in a regular
     // partial-destroy cleanup.
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
index 4d4d10b..8700697 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
@@ -28,6 +28,12 @@ using namespace clang::CIRGen;
 // CIRGenFunction cleanup related
 //===----------------------------------------------------------------------===//
 
+/// Emits all the code to cause the given temporary to be cleaned up.
+void CIRGenFunction::emitCXXTemporary(const CXXTemporary *temporary,
+                                      QualType tempType, Address ptr) {
+  pushDestroy(NormalAndEHCleanup, ptr, tempType, destroyCXXObject);
+}
+
 //===----------------------------------------------------------------------===//
 // EHScopeStack
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.h b/clang/lib/CIR/CodeGen/CIRGenCleanup.h
index a4ec8cc..30f5607 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.h
@@ -104,6 +104,7 @@ public:
   bool isNormalCleanup() const { return cleanupBits.isNormalCleanup; }
 
   bool isActive() const { return cleanupBits.isActive; }
+  void setActive(bool isActive) { cleanupBits.isActive = isActive; }
 
   size_t getCleanupSize() const { return cleanupBits.cleanupSize; }
   void *getCleanupBuffer() { return this + 1; }
@@ -138,5 +139,13 @@ inline EHScopeStack::iterator EHScopeStack::begin() const {
   return iterator(startOfData);
 }
 
+inline EHScopeStack::iterator
+EHScopeStack::find(stable_iterator savePoint) const {
+  assert(savePoint.isValid() && "finding invalid savepoint");
+  assert(savePoint.size <= stable_begin().size &&
+         "finding savepoint after pop");
+  return iterator(endOfBuffer - savePoint.size);
+}
+
 } // namespace clang::CIRGen
 #endif // CLANG_LIB_CIR_CODEGEN_CIRGENCLEANUP_H
diff --git a/clang/lib/CIR/CodeGen/CIRGenException.cpp b/clang/lib/CIR/CodeGen/CIRGenException.cpp
index 7fcb39a..6453843 100644
--- a/clang/lib/CIR/CodeGen/CIRGenException.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenException.cpp
@@ -31,11 +31,36 @@ void CIRGenFunction::emitCXXThrowExpr(const CXXThrowExpr *e) {
     if (throwType->isObjCObjectPointerType()) {
       cgm.errorNYI("emitCXXThrowExpr ObjCObjectPointerType");
       return;
-    } else {
-      cgm.errorNYI("emitCXXThrowExpr with subExpr");
-      return;
     }
-  } else {
-    cgm.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true);
+
+    cgm.getCXXABI().emitThrow(*this, e);
+    return;
   }
+
+  cgm.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true);
+}
+
+void CIRGenFunction::emitAnyExprToExn(const Expr *e, Address addr) {
+  // Make sure the exception object is cleaned up if there's an
+  // exception during initialization.
+  assert(!cir::MissingFeatures::ehCleanupScope());
+
+  // __cxa_allocate_exception returns a void*;  we need to cast this
+  // to the appropriate type for the object.
+  mlir::Type ty = convertTypeForMem(e->getType());
+  Address typedAddr = addr.withElementType(builder, ty);
+
+  // From LLVM's codegen:
+  // FIXME: this isn't quite right!  If there's a final unelided call
+  // to a copy constructor, then according to [except.terminate]p1 we
+  // must call std::terminate() if that constructor throws, because
+  // technically that copy occurs after the exception expression is
+  // evaluated but before the exception is caught.  But the best way
+  // to handle that is to teach EmitAggExpr to do the final copy
+  // differently if it can't be elided.
+  emitAnyExprToMem(e, typedAddr, e->getType().getQualifiers(),
+                   /*isInitializer=*/true);
+
+  // Deactivate the cleanup block.
+  assert(!cir::MissingFeatures::ehCleanupScope());
 }
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
index 60ccf18..901b937 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
@@ -46,6 +46,12 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
     return dest;
   }
 
+  void ensureDest(mlir::Location loc, QualType ty) {
+    if (!dest.isIgnored())
+      return;
+    dest = cgf.createAggTemp(ty, loc, "agg.tmp.ensured");
+  }
+
 public:
   AggExprEmitter(CIRGenFunction &cgf, AggValueSlot dest)
       : cgf(cgf), dest(dest) {}
@@ -96,10 +102,22 @@ public:
     Visit(die->getExpr());
   }
   void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *e) {
-    assert(!cir::MissingFeatures::aggValueSlotDestructedFlag());
+    // Ensure that we have a slot, but if we already do, remember
+    // whether it was externally destructed.
+    bool wasExternallyDestructed = dest.isExternallyDestructed();
+    ensureDest(cgf.getLoc(e->getSourceRange()), e->getType());
+
+    // We're going to push a destructor if there isn't already one.
+    dest.setExternallyDestructed();
+
     Visit(e->getSubExpr());
+
+    // Push that destructor we promised.
+    if (!wasExternallyDestructed)
+      cgf.emitCXXTemporary(e->getTemporary(), e->getType(), dest.getAddress());
   }
   void VisitLambdaExpr(LambdaExpr *e);
+  void VisitExprWithCleanups(ExprWithCleanups *e);
 
   // Stubs -- These should be moved up when they are implemented.
   void VisitCastExpr(CastExpr *e) {
@@ -241,11 +259,6 @@ public:
     cgf.cgm.errorNYI(e->getSourceRange(),
                      "AggExprEmitter: VisitCXXStdInitializerListExpr");
   }
-
-  void VisitExprWithCleanups(ExprWithCleanups *e) {
-    cgf.cgm.errorNYI(e->getSourceRange(),
-                     "AggExprEmitter: VisitExprWithCleanups");
-  }
   void VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *e) {
     cgf.cgm.errorNYI(e->getSourceRange(),
                      "AggExprEmitter: VisitCXXScalarValueInitExpr");
@@ -588,6 +601,11 @@ void AggExprEmitter::VisitLambdaExpr(LambdaExpr *e) {
   }
 }
 
+void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *e) {
+  CIRGenFunction::RunCleanupsScope cleanups(cgf);
+  Visit(e->getSubExpr());
+}
+
 void AggExprEmitter::VisitCallExpr(const CallExpr *e) {
   if (e->getCallReturnType(cgf.getContext())->isReferenceType()) {
     cgf.cgm.errorNYI(e->getSourceRange(), "reference return type");
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 7989ad2..4eb8ca8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CIRGenCXXABI.h"
+#include "CIRGenConstantEmitter.h"
 #include "CIRGenFunction.h"
 
 #include "clang/AST/DeclCXX.h"
@@ -210,6 +211,19 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorCall(
   return emitCall(fnInfo, callee, returnValue, args, nullptr, loc);
 }
 
+static CharUnits calculateCookiePadding(CIRGenFunction &cgf,
+                                        const CXXNewExpr *e) {
+  if (!e->isArray())
+    return CharUnits::Zero();
+
+  // No cookie is required if the operator new[] being used is the
+  // reserved placement operator new[].
+  if (e->getOperatorNew()->isReservedGlobalPlacementOperator())
+    return CharUnits::Zero();
+
+  return cgf.cgm.getCXXABI().getArrayCookieSize(e);
+}
+
 static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
                                        unsigned minElements,
                                        mlir::Value &numElements,
@@ -224,8 +238,98 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
     return sizeWithoutCookie;
   }
 
-  cgf.cgm.errorNYI(e->getSourceRange(), "emitCXXNewAllocSize: array");
-  return {};
+  // The width of size_t.
+  unsigned sizeWidth = cgf.cgm.getDataLayout().getTypeSizeInBits(cgf.SizeTy);
+
+  // The number of elements can be have an arbitrary integer type;
+  // essentially, we need to multiply it by a constant factor, add a
+  // cookie size, and verify that the result is representable as a
+  // size_t.  That's just a gloss, though, and it's wrong in one
+  // important way: if the count is negative, it's an error even if
+  // the cookie size would bring the total size >= 0.
+  //
+  // If the array size is constant, Sema will have prevented negative
+  // values and size overflow.
+
+  // Compute the constant factor.
+  llvm::APInt arraySizeMultiplier(sizeWidth, 1);
+  while (const ConstantArrayType *cat =
+             cgf.getContext().getAsConstantArrayType(type)) {
+    type = cat->getElementType();
+    arraySizeMultiplier *= cat->getSize();
+  }
+
+  CharUnits typeSize = cgf.getContext().getTypeSizeInChars(type);
+  llvm::APInt typeSizeMultiplier(sizeWidth, typeSize.getQuantity());
+  typeSizeMultiplier *= arraySizeMultiplier;
+
+  // Figure out the cookie size.
+  llvm::APInt cookieSize(sizeWidth,
+                         calculateCookiePadding(cgf, e).getQuantity());
+
+  // This will be a size_t.
+  mlir::Value size;
+
+  // Emit the array size expression.
+  // We multiply the size of all dimensions for NumElements.
+  // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6.
+  const Expr *arraySize = *e->getArraySize();
+  mlir::Attribute constNumElements =
+      ConstantEmitter(cgf.cgm, &cgf)
+          .emitAbstract(arraySize, arraySize->getType());
+  if (constNumElements) {
+    // Get an APInt from the constant
+    const llvm::APInt &count =
+        mlir::cast<cir::IntAttr>(constNumElements).getValue();
+
+    unsigned numElementsWidth = count.getBitWidth();
+
+    // The equivalent code in CodeGen/CGExprCXX.cpp handles these cases as
+    // overflow, but that should never happen. The size argument is implicitly
+    // cast to a size_t, so it can never be negative and numElementsWidth will
+    // always equal sizeWidth.
+    assert(!count.isNegative() && "Expected non-negative array size");
+    assert(numElementsWidth == sizeWidth &&
+           "Expected a size_t array size constant");
+
+    // Okay, compute a count at the right width.
+    llvm::APInt adjustedCount = count.zextOrTrunc(sizeWidth);
+
+    // Scale numElements by that.  This might overflow, but we don't
+    // care because it only overflows if allocationSize does too, and
+    // if that overflows then we shouldn't use this.
+    // This emits a constant that may not be used, but we can't tell here
+    // whether it will be needed or not.
+    numElements =
+        cgf.getBuilder().getConstInt(loc, adjustedCount * arraySizeMultiplier);
+
+    // Compute the size before cookie, and track whether it overflowed.
+    bool overflow;
+    llvm::APInt allocationSize =
+        adjustedCount.umul_ov(typeSizeMultiplier, overflow);
+
+    // Sema prevents us from hitting this case
+    assert(!overflow && "Overflow in array allocation size");
+
+    // Add in the cookie, and check whether it's overflowed.
+    if (cookieSize != 0) {
+      cgf.cgm.errorNYI(e->getSourceRange(),
+                       "emitCXXNewAllocSize: array cookie");
+    }
+
+    size = cgf.getBuilder().getConstInt(loc, allocationSize);
+  } else {
+    // TODO: Handle the variable size case
+    cgf.cgm.errorNYI(e->getSourceRange(),
+                     "emitCXXNewAllocSize: variable array size");
+  }
+
+  if (cookieSize == 0)
+    sizeWithoutCookie = size;
+  else
+    assert(sizeWithoutCookie && "didn't set sizeWithoutCookie?");
+
+  return size;
 }
 
 static void storeAnyExprIntoOneUnit(CIRGenFunction &cgf, const Expr *init,
@@ -254,13 +358,26 @@ static void storeAnyExprIntoOneUnit(CIRGenFunction &cgf, const Expr *init,
   llvm_unreachable("bad evaluation kind");
 }
 
+void CIRGenFunction::emitNewArrayInitializer(
+    const CXXNewExpr *e, QualType elementType, mlir::Type elementTy,
+    Address beginPtr, mlir::Value numElements,
+    mlir::Value allocSizeWithoutCookie) {
+  // If we have a type with trivial initialization and no initializer,
+  // there's nothing to do.
+  if (!e->hasInitializer())
+    return;
+
+  cgm.errorNYI(e->getSourceRange(), "emitNewArrayInitializer");
+}
+
 static void emitNewInitializer(CIRGenFunction &cgf, const CXXNewExpr *e,
                                QualType elementType, mlir::Type elementTy,
                                Address newPtr, mlir::Value numElements,
                                mlir::Value allocSizeWithoutCookie) {
   assert(!cir::MissingFeatures::generateDebugInfo());
   if (e->isArray()) {
-    cgf.cgm.errorNYI(e->getSourceRange(), "emitNewInitializer: array");
+    cgf.emitNewArrayInitializer(e, elementType, elementTy, newPtr, numElements,
+                                allocSizeWithoutCookie);
   } else if (const Expr *init = e->getInitializer()) {
     storeAnyExprIntoOneUnit(cgf, init, e->getAllocatedType(), newPtr,
                             AggValueSlot::DoesNotOverlap);
@@ -536,7 +653,14 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
   if (allocSize != allocSizeWithoutCookie)
     cgm.errorNYI(e->getSourceRange(), "emitCXXNewExpr: array with cookies");
 
-  mlir::Type elementTy = convertTypeForMem(allocType);
+  mlir::Type elementTy;
+  if (e->isArray()) {
+    // For array new, use the allocated type to handle multidimensional arrays
+    // correctly
+    elementTy = convertTypeForMem(e->getAllocatedType());
+  } else {
+    elementTy = convertTypeForMem(allocType);
+  }
   Address result = builder.createElementBitCast(getLoc(e->getSourceRange()),
                                                 allocation, elementTy);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index e20a4fc..59aa257 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -118,6 +118,9 @@ class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils {
   /// non-packed LLVM struct will give the correct layout.
   bool naturalLayout = true;
 
+  bool split(size_t index, CharUnits hint);
+  std::optional<size_t> splitAt(CharUnits pos);
+
   static mlir::Attribute buildFrom(CIRGenModule &cgm, ArrayRef<Element> elems,
                                    CharUnits startOffset, CharUnits size,
                                    bool naturalLayout, mlir::Type desiredTy,
@@ -137,6 +140,10 @@ public:
   /// Update or overwrite the bits starting at \p offsetInBits with \p bits.
   bool addBits(llvm::APInt bits, uint64_t offsetInBits, bool allowOverwrite);
 
+  /// Attempt to condense the value starting at \p offset to a constant of type
+  /// \p desiredTy.
+  void condense(CharUnits offset, mlir::Type desiredTy);
+
   /// Produce a constant representing the entire accumulated value, ideally of
   /// the specified type. If \p allowOversized, the constant might be larger
   /// than implied by \p desiredTy (eg, if there is a flexible array member).
@@ -176,6 +183,195 @@ bool ConstantAggregateBuilder::add(mlir::TypedAttr typedAttr, CharUnits offset,
   return false;
 }
 
+bool ConstantAggregateBuilder::addBits(llvm::APInt bits, uint64_t offsetInBits,
+                                       bool allowOverwrite) {
+  const ASTContext &astContext = cgm.getASTContext();
+  const uint64_t charWidth = astContext.getCharWidth();
+  mlir::Type charTy = cgm.getBuilder().getUIntNTy(charWidth);
+
+  // Offset of where we want the first bit to go within the bits of the
+  // current char.
+  unsigned offsetWithinChar = offsetInBits % charWidth;
+
+  // We split bit-fields up into individual bytes. Walk over the bytes and
+  // update them.
+  for (CharUnits offsetInChars =
+           astContext.toCharUnitsFromBits(offsetInBits - offsetWithinChar);
+       /**/; ++offsetInChars) {
+    // Number of bits we want to fill in this char.
+    unsigned wantedBits =
+        std::min((uint64_t)bits.getBitWidth(), charWidth - offsetWithinChar);
+
+    // Get a char containing the bits we want in the right places. The other
+    // bits have unspecified values.
+    llvm::APInt bitsThisChar = bits;
+    if (bitsThisChar.getBitWidth() < charWidth)
+      bitsThisChar = bitsThisChar.zext(charWidth);
+    if (cgm.getDataLayout().isBigEndian()) {
+      // Figure out how much to shift by. We may need to left-shift if we have
+      // less than one byte of Bits left.
+      int shift = bits.getBitWidth() - charWidth + offsetWithinChar;
+      if (shift > 0)
+        bitsThisChar.lshrInPlace(shift);
+      else if (shift < 0)
+        bitsThisChar = bitsThisChar.shl(-shift);
+    } else {
+      bitsThisChar = bitsThisChar.shl(offsetWithinChar);
+    }
+    if (bitsThisChar.getBitWidth() > charWidth)
+      bitsThisChar = bitsThisChar.trunc(charWidth);
+
+    if (wantedBits == charWidth) {
+      // Got a full byte: just add it directly.
+      add(cir::IntAttr::get(charTy, bitsThisChar), offsetInChars,
+          allowOverwrite);
+    } else {
+      // Partial byte: update the existing integer if there is one. If we
+      // can't split out a 1-CharUnit range to update, then we can't add
+      // these bits and fail the entire constant emission.
+      std::optional<size_t> firstElemToUpdate = splitAt(offsetInChars);
+      if (!firstElemToUpdate)
+        return false;
+      std::optional<size_t> lastElemToUpdate =
+          splitAt(offsetInChars + CharUnits::One());
+      if (!lastElemToUpdate)
+        return false;
+      assert(*lastElemToUpdate - *firstElemToUpdate < 2 &&
+             "should have at most one element covering one byte");
+
+      // Figure out which bits we want and discard the rest.
+      llvm::APInt updateMask(charWidth, 0);
+      if (cgm.getDataLayout().isBigEndian())
+        updateMask.setBits(charWidth - offsetWithinChar - wantedBits,
+                           charWidth - offsetWithinChar);
+      else
+        updateMask.setBits(offsetWithinChar, offsetWithinChar + wantedBits);
+      bitsThisChar &= updateMask;
+      bool isNull = false;
+      if (*firstElemToUpdate < elements.size()) {
+        auto firstEltToUpdate =
+            mlir::dyn_cast<cir::IntAttr>(elements[*firstElemToUpdate].element);
+        isNull = firstEltToUpdate && firstEltToUpdate.isNullValue();
+      }
+
+      if (*firstElemToUpdate == *lastElemToUpdate || isNull) {
+        // All existing bits are either zero or undef.
+        add(cir::IntAttr::get(charTy, bitsThisChar), offsetInChars,
+            /*allowOverwrite*/ true);
+      } else {
+        cir::IntAttr ci =
+            mlir::dyn_cast<cir::IntAttr>(elements[*firstElemToUpdate].element);
+        // In order to perform a partial update, we need the existing bitwise
+        // value, which we can only extract for a constant int.
+        if (!ci)
+          return false;
+        // Because this is a 1-CharUnit range, the constant occupying it must
+        // be exactly one CharUnit wide.
+        assert(ci.getBitWidth() == charWidth && "splitAt failed");
+        assert((!(ci.getValue() & updateMask) || allowOverwrite) &&
+               "unexpectedly overwriting bitfield");
+        bitsThisChar |= (ci.getValue() & ~updateMask);
+        elements[*firstElemToUpdate].element =
+            cir::IntAttr::get(charTy, bitsThisChar);
+      }
+    }
+
+    // Stop if we've added all the bits.
+    if (wantedBits == bits.getBitWidth())
+      break;
+
+    // Remove the consumed bits from Bits.
+    if (!cgm.getDataLayout().isBigEndian())
+      bits.lshrInPlace(wantedBits);
+    bits = bits.trunc(bits.getBitWidth() - wantedBits);
+
+    // The remaining bits go at the start of the following bytes.
+    offsetWithinChar = 0;
+  }
+
+  return true;
+}
+
+/// Returns a position within elements such that all elements
+/// before the returned index end before pos and all elements at or after
+/// the returned index begin at or after pos. Splits elements as necessary
+/// to ensure this. Returns std::nullopt if we find something we can't split.
+std::optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits pos) {
+  if (pos >= size)
+    return elements.size();
+
+  while (true) {
+    // Find the first element that starts after pos.
+    Element *iter =
+        llvm::upper_bound(elements, pos, [](CharUnits pos, const Element &elt) {
+          return pos < elt.offset;
+        });
+
+    if (iter == elements.begin())
+      return 0;
+
+    size_t index = iter - elements.begin() - 1;
+    const Element &elt = elements[index];
+
+    // If we already have an element starting at pos, we're done.
+    if (elt.offset == pos)
+      return index;
+
+    // Check for overlap with the element that starts before pos.
+    CharUnits eltEnd = elt.offset + getSize(elt.element);
+    if (eltEnd <= pos)
+      return index + 1;
+
+    // Try to decompose it into smaller constants.
+    if (!split(index, pos))
+      return std::nullopt;
+  }
+}
+
+/// Split the constant at index, if possible. Return true if we did.
+/// Hint indicates the location at which we'd like to split, but may be
+/// ignored.
+bool ConstantAggregateBuilder::split(size_t index, CharUnits hint) {
+  cgm.errorNYI("split constant at index");
+  return false;
+}
+
+void ConstantAggregateBuilder::condense(CharUnits offset,
+                                        mlir::Type desiredTy) {
+  CharUnits desiredSize = getSize(desiredTy);
+
+  std::optional<size_t> firstElemToReplace = splitAt(offset);
+  if (!firstElemToReplace)
+    return;
+  size_t first = *firstElemToReplace;
+
+  std::optional<size_t> lastElemToReplace = splitAt(offset + desiredSize);
+  if (!lastElemToReplace)
+    return;
+  size_t last = *lastElemToReplace;
+
+  size_t length = last - first;
+  if (length == 0)
+    return;
+
+  if (length == 1 && elements[first].offset == offset &&
+      getSize(elements[first].element) == desiredSize) {
+    cgm.errorNYI("re-wrapping single element records");
+    return;
+  }
+
+  // Build a new constant from the elements in the range.
+  SmallVector<Element> subElems(elements.begin() + first,
+                                elements.begin() + last);
+  mlir::Attribute replacement =
+      buildFrom(cgm, subElems, offset, desiredSize,
+                /*naturalLayout=*/false, desiredTy, false);
+
+  // Replace the range with the condensed constant.
+  Element newElt(mlir::cast<mlir::TypedAttr>(replacement), offset);
+  replace(elements, first, last, {newElt});
+}
+
 mlir::Attribute
 ConstantAggregateBuilder::buildFrom(CIRGenModule &cgm, ArrayRef<Element> elems,
                                     CharUnits startOffset, CharUnits size,
@@ -301,6 +497,9 @@ private:
   bool appendBytes(CharUnits fieldOffsetInChars, mlir::TypedAttr initCst,
                    bool allowOverwrite = false);
 
+  bool appendBitField(const FieldDecl *field, uint64_t fieldOffset,
+                      cir::IntAttr ci, bool allowOverwrite = false);
+
   bool build(InitListExpr *ile, bool allowOverwrite);
   bool build(const APValue &val, const RecordDecl *rd, bool isPrimaryBase,
              const CXXRecordDecl *vTableClass, CharUnits baseOffset);
@@ -325,6 +524,30 @@ bool ConstRecordBuilder::appendBytes(CharUnits fieldOffsetInChars,
   return builder.add(initCst, startOffset + fieldOffsetInChars, allowOverwrite);
 }
 
+bool ConstRecordBuilder::appendBitField(const FieldDecl *field,
+                                        uint64_t fieldOffset, cir::IntAttr ci,
+                                        bool allowOverwrite) {
+  const CIRGenRecordLayout &rl =
+      cgm.getTypes().getCIRGenRecordLayout(field->getParent());
+  const CIRGenBitFieldInfo &info = rl.getBitFieldInfo(field);
+  llvm::APInt fieldValue = ci.getValue();
+
+  // Promote the size of FieldValue if necessary
+  // FIXME: This should never occur, but currently it can because initializer
+  // constants are cast to bool, and because clang is not enforcing bitfield
+  // width limits.
+  if (info.size > fieldValue.getBitWidth())
+    fieldValue = fieldValue.zext(info.size);
+
+  // Truncate the size of FieldValue to the bit field size.
+  if (info.size < fieldValue.getBitWidth())
+    fieldValue = fieldValue.trunc(info.size);
+
+  return builder.addBits(fieldValue,
+                         cgm.getASTContext().toBits(startOffset) + fieldOffset,
+                         allowOverwrite);
+}
+
 bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
   RecordDecl *rd = ile->getType()
                        ->castAs<clang::RecordType>()
@@ -407,12 +630,14 @@ bool ConstRecordBuilder::build(InitListExpr *ile, bool allowOverwrite) {
     } else {
       // Otherwise we have a bitfield.
       if (auto constInt = dyn_cast<cir::IntAttr>(eltInit)) {
-        assert(!cir::MissingFeatures::bitfields());
-        cgm.errorNYI(field->getSourceRange(), "bitfields");
+        if (!appendBitField(field, layout.getFieldOffset(index), constInt,
+                            allowOverwrite))
+          return false;
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
       }
-      // We are trying to initialize a bitfield with a non-trivial constant,
-      // this must require run-time code.
-      return false;
     }
   }
 
@@ -510,8 +735,16 @@ bool ConstRecordBuilder::build(const APValue &val, const RecordDecl *rd,
       if (field->hasAttr<NoUniqueAddressAttr>())
         allowOverwrite = true;
     } else {
-      assert(!cir::MissingFeatures::bitfields());
-      cgm.errorNYI(field->getSourceRange(), "bitfields");
+      // Otherwise we have a bitfield.
+      if (auto constInt = dyn_cast<cir::IntAttr>(eltInit)) {
+        if (!appendBitField(field, layout.getFieldOffset(index) + offsetBits,
+                            constInt, allowOverwrite))
+          return false;
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
+      }
     }
   }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 768d75d..5d3496a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -1099,15 +1099,17 @@ public:
           CIRGenFunction::LexicalScope lexScope{cgf, loc,
                                                 b.getInsertionBlock()};
           cgf.curLexScope->setAsTernary();
-          b.create<cir::YieldOp>(loc, cgf.evaluateExprAsBool(e->getRHS()));
+          mlir::Value res = cgf.evaluateExprAsBool(e->getRHS());
+          lexScope.forceCleanup();
+          cir::YieldOp::create(b, loc, res);
         },
         /*falseBuilder*/
         [&](mlir::OpBuilder &b, mlir::Location loc) {
           CIRGenFunction::LexicalScope lexScope{cgf, loc,
                                                 b.getInsertionBlock()};
           cgf.curLexScope->setAsTernary();
-          auto res = b.create<cir::ConstantOp>(loc, builder.getFalseAttr());
-          b.create<cir::YieldOp>(loc, res.getRes());
+          auto res = cir::ConstantOp::create(b, loc, builder.getFalseAttr());
+          cir::YieldOp::create(b, loc, res.getRes());
         });
     return maybePromoteBoolResult(resOp.getResult(), resTy);
   }
@@ -1143,15 +1145,17 @@ public:
           CIRGenFunction::LexicalScope lexScope{cgf, loc,
                                                 b.getInsertionBlock()};
           cgf.curLexScope->setAsTernary();
-          auto res = b.create<cir::ConstantOp>(loc, builder.getTrueAttr());
-          b.create<cir::YieldOp>(loc, res.getRes());
+          auto res = cir::ConstantOp::create(b, loc, builder.getTrueAttr());
+          cir::YieldOp::create(b, loc, res.getRes());
         },
         /*falseBuilder*/
         [&](mlir::OpBuilder &b, mlir::Location loc) {
           CIRGenFunction::LexicalScope lexScope{cgf, loc,
                                                 b.getInsertionBlock()};
           cgf.curLexScope->setAsTernary();
-          b.create<cir::YieldOp>(loc, cgf.evaluateExprAsBool(e->getRHS()));
+          mlir::Value res = cgf.evaluateExprAsBool(e->getRHS());
+          lexScope.forceCleanup();
+          cir::YieldOp::create(b, loc, res);
         });
 
     return maybePromoteBoolResult(resOp.getResult(), resTy);
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index dfd9d2c..a60efe1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1090,6 +1090,8 @@ public:
   /// even if no aggregate location is provided.
   RValue emitAnyExprToTemp(const clang::Expr *e);
 
+  void emitAnyExprToExn(const Expr *e, Address addr);
+
   void emitArrayDestroy(mlir::Value begin, mlir::Value numElements,
                         QualType elementType, CharUnits elementAlign,
                         Destroyer *destroyer);
@@ -1252,12 +1254,20 @@ public:
 
   mlir::Value emitCXXNewExpr(const CXXNewExpr *e);
 
+  void emitNewArrayInitializer(const CXXNewExpr *E, QualType ElementType,
+                               mlir::Type ElementTy, Address BeginPtr,
+                               mlir::Value NumElements,
+                               mlir::Value AllocSizeWithoutCookie);
+
   RValue emitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *e,
                                        const CXXMethodDecl *md,
                                        ReturnValueSlot returnValue);
 
   RValue emitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *expr);
 
+  void emitCXXTemporary(const CXXTemporary *temporary, QualType tempType,
+                        Address ptr);
+
   void emitCXXThrowExpr(const CXXThrowExpr *e);
 
   void emitCtorPrologue(const clang::CXXConstructorDecl *ctor,
diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
index debea8af..0418174 100644
--- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
@@ -70,6 +70,7 @@ public:
                           QualType thisTy) override;
 
   void emitRethrow(CIRGenFunction &cgf, bool isNoReturn) override;
+  void emitThrow(CIRGenFunction &cgf, const CXXThrowExpr *e) override;
 
   bool useThunkForDtorVariant(const CXXDestructorDecl *dtor,
                               CXXDtorType dt) const override {
@@ -1544,6 +1545,59 @@ void CIRGenItaniumCXXABI::emitRethrow(CIRGenFunction &cgf, bool isNoReturn) {
   }
 }
 
+void CIRGenItaniumCXXABI::emitThrow(CIRGenFunction &cgf,
+                                    const CXXThrowExpr *e) {
+  // This differs a bit from LLVM codegen, CIR has native operations for some
+  // cxa functions, and defers allocation size computation, always pass the dtor
+  // symbol, etc. CIRGen also does not use getAllocateExceptionFn / getThrowFn.
+
+  // Now allocate the exception object.
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  QualType clangThrowType = e->getSubExpr()->getType();
+  cir::PointerType throwTy =
+      builder.getPointerTo(cgf.convertType(clangThrowType));
+  uint64_t typeSize =
+      cgf.getContext().getTypeSizeInChars(clangThrowType).getQuantity();
+  mlir::Location subExprLoc = cgf.getLoc(e->getSubExpr()->getSourceRange());
+
+  // Defer computing allocation size to some later lowering pass.
+  mlir::TypedValue<cir::PointerType> exceptionPtr =
+      cir::AllocExceptionOp::create(builder, subExprLoc, throwTy,
+                                    builder.getI64IntegerAttr(typeSize))
+          .getAddr();
+
+  // Build expression and store its result into exceptionPtr.
+  CharUnits exnAlign = cgf.getContext().getExnObjectAlignment();
+  cgf.emitAnyExprToExn(e->getSubExpr(), Address(exceptionPtr, exnAlign));
+
+  // Get the RTTI symbol address.
+  auto typeInfo = mlir::cast<cir::GlobalViewAttr>(
+      cgm.getAddrOfRTTIDescriptor(subExprLoc, clangThrowType,
+                                  /*forEH=*/true));
+  assert(!typeInfo.getIndices() && "expected no indirection");
+
+  // The address of the destructor.
+  //
+  // Note: LLVM codegen already optimizes out the dtor if the
+  // type is a record with trivial dtor (by passing down a
+  // null dtor). In CIR, we forward this info and allow for
+  // Lowering pass to skip passing the trivial function.
+  //
+  if (const RecordType *recordTy = clangThrowType->getAs<RecordType>()) {
+    CXXRecordDecl *rec =
+        cast<CXXRecordDecl>(recordTy->getOriginalDecl()->getDefinition());
+    assert(!cir::MissingFeatures::isTrivialCtorOrDtor());
+    if (!rec->hasTrivialDestructor()) {
+      cgm.errorNYI("emitThrow: non-trivial destructor");
+      return;
+    }
+  }
+
+  // Now throw the exception.
+  mlir::Location loc = cgf.getLoc(e->getSourceRange());
+  insertThrowAndSplit(builder, loc, exceptionPtr, typeInfo.getSymbol());
+}
+
 CIRGenCXXABI *clang::CIRGen::CreateCIRGenItaniumCXXABI(CIRGenModule &cgm) {
   switch (cgm.getASTContext().getCXXABIKind()) {
   case TargetCXXABI::GenericItanium:
diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h
index ea8625a..25b6ecb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenValue.h
+++ b/clang/lib/CIR/CodeGen/CIRGenValue.h
@@ -371,6 +371,13 @@ public:
                    mayOverlap, isZeroed);
   }
 
+  IsDestructed_t isExternallyDestructed() const {
+    return IsDestructed_t(destructedFlag);
+  }
+  void setExternallyDestructed(bool destructed = true) {
+    destructedFlag = destructed;
+  }
+
   clang::Qualifiers getQualifiers() const { return quals; }
 
   Address getAddress() const { return addr; }
diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h
index c87a6ef..66c1f76 100644
--- a/clang/lib/CIR/CodeGen/EHScopeStack.h
+++ b/clang/lib/CIR/CodeGen/EHScopeStack.h
@@ -175,6 +175,10 @@ public:
     return stable_iterator(endOfBuffer - startOfData);
   }
 
+  /// Turn a stable reference to a scope depth into a unstable pointer
+  /// to the EH stack.
+  iterator find(stable_iterator savePoint) const;
+
   /// Create a stable reference to the bottom of the EH stack.
   static stable_iterator stable_end() { return stable_iterator(0); }
 };
diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
index 3484c59..64ac970 100644
--- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
@@ -473,6 +473,49 @@ LogicalResult cir::VTableAttr::verify(
 }
 
 //===----------------------------------------------------------------------===//
+// DynamicCastInfoAtttr definitions
+//===----------------------------------------------------------------------===//
+
+std::string DynamicCastInfoAttr::getAlias() const {
+  // The alias looks like: `dyn_cast_info_<src>_<dest>`
+
+  std::string alias = "dyn_cast_info_";
+
+  alias.append(getSrcRtti().getSymbol().getValue());
+  alias.push_back('_');
+  alias.append(getDestRtti().getSymbol().getValue());
+
+  return alias;
+}
+
+LogicalResult DynamicCastInfoAttr::verify(
+    function_ref<InFlightDiagnostic()> emitError, cir::GlobalViewAttr srcRtti,
+    cir::GlobalViewAttr destRtti, mlir::FlatSymbolRefAttr runtimeFunc,
+    mlir::FlatSymbolRefAttr badCastFunc, cir::IntAttr offsetHint) {
+  auto isRttiPtr = [](mlir::Type ty) {
+    // RTTI pointers are !cir.ptr<!u8i>.
+
+    auto ptrTy = mlir::dyn_cast<cir::PointerType>(ty);
+    if (!ptrTy)
+      return false;
+
+    auto pointeeIntTy = mlir::dyn_cast<cir::IntType>(ptrTy.getPointee());
+    if (!pointeeIntTy)
+      return false;
+
+    return pointeeIntTy.isUnsigned() && pointeeIntTy.getWidth() == 8;
+  };
+
+  if (!isRttiPtr(srcRtti.getType()))
+    return emitError() << "srcRtti must be an RTTI pointer";
+
+  if (!isRttiPtr(destRtti.getType()))
+    return emitError() << "destRtti must be an RTTI pointer";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
 // CIR Dialect
 //===----------------------------------------------------------------------===//
 
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index cdd4e3c..5f88590 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -71,6 +71,10 @@ struct CIROpAsmDialectInterface : public OpAsmDialectInterface {
       os << "bfi_" << bitfield.getName().str();
       return AliasResult::FinalAlias;
     }
+    if (auto dynCastInfoAttr = mlir::dyn_cast<cir::DynamicCastInfoAttr>(attr)) {
+      os << dynCastInfoAttr.getAlias();
+      return AliasResult::FinalAlias;
+    }
     return AliasResult::NoAlias;
   }
 };
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index c15637d..2eeef81 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -8,18 +8,39 @@
 
 #include "PassDetail.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/Basic/Module.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIROpsEnums.h"
 #include "clang/CIR/Dialect/Passes.h"
 #include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/Path.h"
 
 #include <memory>
 
 using namespace mlir;
 using namespace cir;
 
+static SmallString<128> getTransformedFileName(mlir::ModuleOp mlirModule) {
+  SmallString<128> fileName;
+
+  if (mlirModule.getSymName())
+    fileName = llvm::sys::path::filename(mlirModule.getSymName()->str());
+
+  if (fileName.empty())
+    fileName = "<null>";
+
+  for (size_t i = 0; i < fileName.size(); ++i) {
+    // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+    // to be the set of C preprocessing numbers.
+    if (!clang::isPreprocessingNumberBody(fileName[i]))
+      fileName[i] = '_';
+  }
+
+  return fileName;
+}
+
 namespace {
 struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
   LoweringPreparePass() = default;
@@ -30,9 +51,16 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
   void lowerComplexDivOp(cir::ComplexDivOp op);
   void lowerComplexMulOp(cir::ComplexMulOp op);
   void lowerUnaryOp(cir::UnaryOp op);
+  void lowerGlobalOp(cir::GlobalOp op);
   void lowerArrayDtor(cir::ArrayDtor op);
   void lowerArrayCtor(cir::ArrayCtor op);
 
+  /// Build the function that initializes the specified global
+  cir::FuncOp buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op);
+
+  /// Build a module init function that calls all the dynamic initializers.
+  void buildCXXGlobalInitFunc();
+
   cir::FuncOp buildRuntimeFunction(
       mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
       cir::FuncType type,
@@ -47,6 +75,10 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
   /// Tracks current module.
   mlir::ModuleOp mlirModule;
 
+  /// Tracks existing dynamic initializers.
+  llvm::StringMap<uint32_t> dynamicInitializerNames;
+  llvm::SmallVector<cir::FuncOp> dynamicInitializers;
+
   void setASTContext(clang::ASTContext *c) { astCtx = c; }
 };
 
@@ -589,6 +621,111 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
   op.erase();
 }
 
+cir::FuncOp
+LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op) {
+  // TODO(cir): Store this in the GlobalOp.
+  // This should come from the MangleContext, but for now I'm hardcoding it.
+  SmallString<256> fnName("__cxx_global_var_init");
+  // Get a unique name
+  uint32_t cnt = dynamicInitializerNames[fnName]++;
+  if (cnt)
+    fnName += "." + llvm::Twine(cnt).str();
+
+  // Create a variable initialization function.
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+  auto fnType = cir::FuncType::get({}, builder.getVoidTy());
+  FuncOp f = buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
+                                  cir::GlobalLinkageKind::InternalLinkage);
+
+  // Move over the initialzation code of the ctor region.
+  mlir::Block *entryBB = f.addEntryBlock();
+  if (!op.getCtorRegion().empty()) {
+    mlir::Block &block = op.getCtorRegion().front();
+    entryBB->getOperations().splice(entryBB->begin(), block.getOperations(),
+                                    block.begin(), std::prev(block.end()));
+  }
+
+  // Register the destructor call with __cxa_atexit
+  mlir::Region &dtorRegion = op.getDtorRegion();
+  if (!dtorRegion.empty()) {
+    assert(!cir::MissingFeatures::opGlobalDtorLowering());
+    llvm_unreachable("dtor region lowering is NYI");
+  }
+
+  // Replace cir.yield with cir.return
+  builder.setInsertionPointToEnd(entryBB);
+  mlir::Operation *yieldOp = nullptr;
+  if (!op.getCtorRegion().empty()) {
+    mlir::Block &block = op.getCtorRegion().front();
+    yieldOp = &block.getOperations().back();
+  } else {
+    assert(!cir::MissingFeatures::opGlobalDtorLowering());
+    llvm_unreachable("dtor region lowering is NYI");
+  }
+
+  assert(isa<YieldOp>(*yieldOp));
+  cir::ReturnOp::create(builder, yieldOp->getLoc());
+  return f;
+}
+
+void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
+  mlir::Region &ctorRegion = op.getCtorRegion();
+  mlir::Region &dtorRegion = op.getDtorRegion();
+
+  if (!ctorRegion.empty() || !dtorRegion.empty()) {
+    // Build a variable initialization function and move the initialzation code
+    // in the ctor region over.
+    cir::FuncOp f = buildCXXGlobalVarDeclInitFunc(op);
+
+    // Clear the ctor and dtor region
+    ctorRegion.getBlocks().clear();
+    dtorRegion.getBlocks().clear();
+
+    assert(!cir::MissingFeatures::astVarDeclInterface());
+    dynamicInitializers.push_back(f);
+  }
+
+  assert(!cir::MissingFeatures::opGlobalAnnotations());
+}
+
+void LoweringPreparePass::buildCXXGlobalInitFunc() {
+  if (dynamicInitializers.empty())
+    return;
+
+  assert(!cir::MissingFeatures::opGlobalCtorList());
+
+  SmallString<256> fnName;
+  // Include the filename in the symbol name. Including "sub_" matches gcc
+  // and makes sure these symbols appear lexicographically behind the symbols
+  // with priority (TBD).  Module implementation units behave the same
+  // way as a non-modular TU with imports.
+  // TODO: check CXX20ModuleInits
+  if (astCtx->getCurrentNamedModule() &&
+      !astCtx->getCurrentNamedModule()->isModuleImplementation()) {
+    llvm::raw_svector_ostream out(fnName);
+    std::unique_ptr<clang::MangleContext> mangleCtx(
+        astCtx->createMangleContext());
+    cast<clang::ItaniumMangleContext>(*mangleCtx)
+        .mangleModuleInitializer(astCtx->getCurrentNamedModule(), out);
+  } else {
+    fnName += "_GLOBAL__sub_I_";
+    fnName += getTransformedFileName(mlirModule);
+  }
+
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointToEnd(&mlirModule.getBodyRegion().back());
+  auto fnType = cir::FuncType::get({}, builder.getVoidTy());
+  cir::FuncOp f =
+      buildRuntimeFunction(builder, fnName, mlirModule.getLoc(), fnType,
+                           cir::GlobalLinkageKind::ExternalLinkage);
+  builder.setInsertionPointToStart(f.addEntryBlock());
+  for (cir::FuncOp &f : dynamicInitializers)
+    builder.createCallOp(f.getLoc(), f, {});
+
+  cir::ReturnOp::create(builder, f.getLoc());
+}
+
 static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder,
                                        clang::ASTContext *astCtx,
                                        mlir::Operation *op, mlir::Type eltTy,
@@ -691,6 +828,8 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) {
     lowerComplexDivOp(complexDiv);
   else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op))
     lowerComplexMulOp(complexMul);
+  else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op))
+    lowerGlobalOp(glob);
   else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op))
     lowerUnaryOp(unary);
 }
@@ -704,12 +843,15 @@ void LoweringPreparePass::runOnOperation() {
 
   op->walk([&](mlir::Operation *op) {
     if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
-                  cir::ComplexMulOp, cir::ComplexDivOp, cir::UnaryOp>(op))
+                  cir::ComplexMulOp, cir::ComplexDivOp, cir::GlobalOp,
+                  cir::UnaryOp>(op))
       opsToTransform.push_back(op);
   });
 
   for (mlir::Operation *o : opsToTransform)
     runOnOp(o);
+
+  buildCXXGlobalInitFunc();
 }
 
 std::unique_ptr<Pass> mlir::createLoweringPreparePass() {
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 1ff8cc5..e9649af 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1058,7 +1058,7 @@ mlir::LogicalResult CIRToLLVMPtrStrideOpLowering::matchAndRewrite(
   const mlir::Type resultTy = tc->convertType(ptrStrideOp.getType());
 
   mlir::Type elementTy =
-      convertTypeForMemory(*tc, dataLayout, ptrStrideOp.getElementTy());
+      convertTypeForMemory(*tc, dataLayout, ptrStrideOp.getElementType());
   mlir::MLIRContext *ctx = elementTy.getContext();
 
   // void and function types doesn't really have a layout to use in GEPs,
@@ -2581,22 +2581,69 @@ void createLLVMFuncOpIfNotExist(mlir::ConversionPatternRewriter &rewriter,
 mlir::LogicalResult CIRToLLVMThrowOpLowering::matchAndRewrite(
     cir::ThrowOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
-  if (op.rethrows()) {
-    auto voidTy = mlir::LLVM::LLVMVoidType::get(getContext());
-    auto funcTy =
-        mlir::LLVM::LLVMFunctionType::get(getContext(), voidTy, {}, false);
+  mlir::Location loc = op.getLoc();
+  auto voidTy = mlir::LLVM::LLVMVoidType::get(getContext());
 
-    auto mlirModule = op->getParentOfType<mlir::ModuleOp>();
-    rewriter.setInsertionPointToStart(&mlirModule.getBodyRegion().front());
+  if (op.rethrows()) {
+    auto funcTy = mlir::LLVM::LLVMFunctionType::get(voidTy, {});
 
+    // Get or create `declare void @__cxa_rethrow()`
     const llvm::StringRef functionName = "__cxa_rethrow";
     createLLVMFuncOpIfNotExist(rewriter, op, functionName, funcTy);
 
-    rewriter.setInsertionPointAfter(op.getOperation());
-    rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
-        op, mlir::TypeRange{}, functionName, mlir::ValueRange{});
+    auto cxaRethrow = mlir::LLVM::CallOp::create(
+        rewriter, loc, mlir::TypeRange{}, functionName);
+
+    rewriter.replaceOp(op, cxaRethrow);
+    return mlir::success();
   }
 
+  auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+  auto fnTy = mlir::LLVM::LLVMFunctionType::get(
+      voidTy, {llvmPtrTy, llvmPtrTy, llvmPtrTy});
+
+  // Get or create `declare void @__cxa_throw(ptr, ptr, ptr)`
+  const llvm::StringRef fnName = "__cxa_throw";
+  createLLVMFuncOpIfNotExist(rewriter, op, fnName, fnTy);
+
+  mlir::Value typeInfo = mlir::LLVM::AddressOfOp::create(
+      rewriter, loc, mlir::LLVM::LLVMPointerType::get(rewriter.getContext()),
+      adaptor.getTypeInfoAttr());
+
+  mlir::Value dtor;
+  if (op.getDtor()) {
+    dtor = mlir::LLVM::AddressOfOp::create(rewriter, loc, llvmPtrTy,
+                                           adaptor.getDtorAttr());
+  } else {
+    dtor = mlir::LLVM::ZeroOp::create(rewriter, loc, llvmPtrTy);
+  }
+
+  auto cxaThrowCall = mlir::LLVM::CallOp::create(
+      rewriter, loc, mlir::TypeRange{}, fnName,
+      mlir::ValueRange{adaptor.getExceptionPtr(), typeInfo, dtor});
+
+  rewriter.replaceOp(op, cxaThrowCall);
+  return mlir::success();
+}
+
+mlir::LogicalResult CIRToLLVMAllocExceptionOpLowering::matchAndRewrite(
+    cir::AllocExceptionOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  // Get or create `declare ptr @__cxa_allocate_exception(i64)`
+  StringRef fnName = "__cxa_allocate_exception";
+  auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext());
+  auto int64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
+  auto fnTy = mlir::LLVM::LLVMFunctionType::get(llvmPtrTy, {int64Ty});
+
+  createLLVMFuncOpIfNotExist(rewriter, op, fnName, fnTy);
+  auto exceptionSize = mlir::LLVM::ConstantOp::create(rewriter, op.getLoc(),
+                                                      adaptor.getSizeAttr());
+
+  auto allocaExceptionCall = mlir::LLVM::CallOp::create(
+      rewriter, op.getLoc(), mlir::TypeRange{llvmPtrTy}, fnName,
+      mlir::ValueRange{exceptionSize});
+
+  rewriter.replaceOp(op, allocaExceptionCall);
   return mlir::success();
 }
 
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e6e4947..a071e80 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1272,6 +1272,23 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound,
   EmitCheck(std::make_pair(Check, CheckKind), CheckHandler, StaticData, Index);
 }
 
+void CodeGenFunction::EmitAllocToken(llvm::CallBase *CB, QualType AllocType) {
+  assert(SanOpts.has(SanitizerKind::AllocToken) &&
+         "Only needed with -fsanitize=alloc-token");
+
+  PrintingPolicy Policy(CGM.getContext().getLangOpts());
+  Policy.SuppressTagKeyword = true;
+  Policy.FullyQualifiedName = true;
+  SmallString<64> TypeName;
+  llvm::raw_svector_ostream TypeNameOS(TypeName);
+  AllocType.getCanonicalType().print(TypeNameOS, Policy);
+  auto *TypeMDS = llvm::MDString::get(CGM.getLLVMContext(), TypeNameOS.str());
+
+  // Format: !{<type-name>}
+  auto *MDN = llvm::MDNode::get(CGM.getLLVMContext(), {TypeMDS});
+  CB->setMetadata(llvm::LLVMContext::MD_alloc_token, MDN);
+}
+
 CodeGenFunction::ComplexPairTy CodeGenFunction::
 EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
                          bool isInc, bool isPre) {
@@ -6784,29 +6801,26 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) {
   return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV;
 }
 
-void CodeGenFunction::FlattenAccessAndType(
-    Address Addr, QualType AddrType,
-    SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList,
-    SmallVectorImpl<QualType> &FlatTypes) {
-  // WorkList is list of type we are processing + the Index List to access
-  // the field of that type in Addr for use in a GEP
-  llvm::SmallVector<std::pair<QualType, llvm::SmallVector<llvm::Value *, 4>>,
-                    16>
+void CodeGenFunction::FlattenAccessAndTypeLValue(
+    LValue Val, SmallVectorImpl<LValue> &AccessList) {
+
+  llvm::SmallVector<
+      std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16>
       WorkList;
   llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32);
-  // Addr should be a pointer so we need to 'dereference' it
-  WorkList.push_back({AddrType, {llvm::ConstantInt::get(IdxTy, 0)}});
+  WorkList.push_back({Val, Val.getType(), {llvm::ConstantInt::get(IdxTy, 0)}});
 
   while (!WorkList.empty()) {
-    auto [T, IdxList] = WorkList.pop_back_val();
+    auto [LVal, T, IdxList] = WorkList.pop_back_val();
     T = T.getCanonicalType().getUnqualifiedType();
     assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL");
+
     if (const auto *CAT = dyn_cast<ConstantArrayType>(T)) {
       uint64_t Size = CAT->getZExtSize();
       for (int64_t I = Size - 1; I > -1; I--) {
         llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
         IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I));
-        WorkList.emplace_back(CAT->getElementType(), IdxListCopy);
+        WorkList.emplace_back(LVal, CAT->getElementType(), IdxListCopy);
       }
     } else if (const auto *RT = dyn_cast<RecordType>(T)) {
       const RecordDecl *Record = RT->getOriginalDecl()->getDefinitionOrSelf();
@@ -6814,44 +6828,75 @@ void CodeGenFunction::FlattenAccessAndType(
 
       const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(Record);
 
-      llvm::SmallVector<QualType, 16> FieldTypes;
+      llvm::SmallVector<
+          std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16>
+          ReverseList;
       if (CXXD && CXXD->isStandardLayout())
         Record = CXXD->getStandardLayoutBaseWithFields();
 
       // deal with potential base classes
       if (CXXD && !CXXD->isStandardLayout()) {
-        for (auto &Base : CXXD->bases())
-          FieldTypes.push_back(Base.getType());
+        if (CXXD->getNumBases() > 0) {
+          assert(CXXD->getNumBases() == 1 &&
+                 "HLSL doesn't support multiple inheritance.");
+          auto Base = CXXD->bases_begin();
+          llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
+          IdxListCopy.push_back(llvm::ConstantInt::get(
+              IdxTy, 0)); // base struct should be at index zero
+          ReverseList.emplace_back(LVal, Base->getType(), IdxListCopy);
+        }
       }
 
-      for (auto *FD : Record->fields())
-        FieldTypes.push_back(FD->getType());
+      const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(Record);
 
-      for (int64_t I = FieldTypes.size() - 1; I > -1; I--) {
-        llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
-        IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I));
-        WorkList.insert(WorkList.end(), {FieldTypes[I], IdxListCopy});
+      llvm::Type *LLVMT = ConvertTypeForMem(T);
+      CharUnits Align = getContext().getTypeAlignInChars(T);
+      LValue RLValue;
+      bool createdGEP = false;
+      for (auto *FD : Record->fields()) {
+        if (FD->isBitField()) {
+          if (FD->isUnnamedBitField())
+            continue;
+          if (!createdGEP) {
+            createdGEP = true;
+            Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList,
+                                                    LLVMT, Align, "gep");
+            RLValue = MakeAddrLValue(GEP, T);
+          }
+          LValue FieldLVal = EmitLValueForField(RLValue, FD, true);
+          ReverseList.push_back({FieldLVal, FD->getType(), {}});
+        } else {
+          llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
+          IdxListCopy.push_back(
+              llvm::ConstantInt::get(IdxTy, Layout.getLLVMFieldNo(FD)));
+          ReverseList.emplace_back(LVal, FD->getType(), IdxListCopy);
+        }
       }
+
+      std::reverse(ReverseList.begin(), ReverseList.end());
+      llvm::append_range(WorkList, ReverseList);
     } else if (const auto *VT = dyn_cast<VectorType>(T)) {
       llvm::Type *LLVMT = ConvertTypeForMem(T);
       CharUnits Align = getContext().getTypeAlignInChars(T);
-      Address GEP =
-          Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "vector.gep");
+      Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT,
+                                              Align, "vector.gep");
+      LValue Base = MakeAddrLValue(GEP, T);
       for (unsigned I = 0, E = VT->getNumElements(); I < E; I++) {
-        llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, I);
-        // gep on vector fields is not recommended so combine gep with
-        // extract/insert
-        AccessList.emplace_back(GEP, Idx);
-        FlatTypes.push_back(VT->getElementType());
+        llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I);
+        LValue LV =
+            LValue::MakeVectorElt(Base.getAddress(), Idx, VT->getElementType(),
+                                  Base.getBaseInfo(), TBAAAccessInfo());
+        AccessList.emplace_back(LV);
       }
-    } else {
-      // a scalar/builtin type
-      llvm::Type *LLVMT = ConvertTypeForMem(T);
-      CharUnits Align = getContext().getTypeAlignInChars(T);
-      Address GEP =
-          Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "gep");
-      AccessList.emplace_back(GEP, nullptr);
-      FlatTypes.push_back(T);
+    } else { // a scalar/builtin type
+      if (!IdxList.empty()) {
+        llvm::Type *LLVMT = ConvertTypeForMem(T);
+        CharUnits Align = getContext().getTypeAlignInChars(T);
+        Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList,
+                                                LLVMT, Align, "gep");
+        AccessList.emplace_back(MakeAddrLValue(GEP, T));
+      } else // must be a bitfield we already created an lvalue for
+        AccessList.emplace_back(LVal);
     }
   }
 }
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index b8150a2..07b9aeb 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -488,100 +488,62 @@ static bool isTrivialFiller(Expr *E) {
   return false;
 }
 
-static void EmitHLSLAggregateSplatCast(CodeGenFunction &CGF, Address DestVal,
-                                       QualType DestTy, llvm::Value *SrcVal,
-                                       QualType SrcTy, SourceLocation Loc) {
+// emit an elementwise cast where the RHS is a scalar or vector
+// or emit an aggregate splat cast
+static void EmitHLSLScalarElementwiseAndSplatCasts(CodeGenFunction &CGF,
+                                                   LValue DestVal,
+                                                   llvm::Value *SrcVal,
+                                                   QualType SrcTy,
+                                                   SourceLocation Loc) {
   // Flatten our destination
-  SmallVector<QualType> DestTypes; // Flattened type
-  SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
-  // ^^ Flattened accesses to DestVal we want to store into
-  CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
-
-  assert(SrcTy->isScalarType() && "Invalid HLSL Aggregate splat cast.");
-  for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; ++I) {
-    llvm::Value *Cast =
-        CGF.EmitScalarConversion(SrcVal, SrcTy, DestTypes[I], Loc);
-
-    // store back
-    llvm::Value *Idx = StoreGEPList[I].second;
-    if (Idx) {
-      llvm::Value *V =
-          CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
-      Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
-    }
-    CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
-  }
-}
-
-// emit a flat cast where the RHS is a scalar, including vector
-static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal,
-                                   QualType DestTy, llvm::Value *SrcVal,
-                                   QualType SrcTy, SourceLocation Loc) {
-  // Flatten our destination
-  SmallVector<QualType, 16> DestTypes; // Flattened type
-  SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
-  // ^^ Flattened accesses to DestVal we want to store into
-  CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
-
-  assert(SrcTy->isVectorType() && "HLSL Flat cast doesn't handle splatting.");
-  const VectorType *VT = SrcTy->getAs<VectorType>();
-  SrcTy = VT->getElementType();
-  assert(StoreGEPList.size() <= VT->getNumElements() &&
-         "Cannot perform HLSL flat cast when vector source \
-         object has less elements than flattened destination \
-         object.");
-  for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; I++) {
-    llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load");
+  SmallVector<LValue, 16> StoreList;
+  CGF.FlattenAccessAndTypeLValue(DestVal, StoreList);
+
+  bool isVector = false;
+  if (auto *VT = SrcTy->getAs<VectorType>()) {
+    isVector = true;
+    SrcTy = VT->getElementType();
+    assert(StoreList.size() <= VT->getNumElements() &&
+           "Cannot perform HLSL flat cast when vector source \
+           object has less elements than flattened destination \
+           object.");
+  }
+
+  for (unsigned I = 0, Size = StoreList.size(); I < Size; I++) {
+    LValue DestLVal = StoreList[I];
+    llvm::Value *Load =
+        isVector ? CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load")
+                 : SrcVal;
     llvm::Value *Cast =
-        CGF.EmitScalarConversion(Load, SrcTy, DestTypes[I], Loc);
-
-    // store back
-    llvm::Value *Idx = StoreGEPList[I].second;
-    if (Idx) {
-      llvm::Value *V =
-          CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
-      Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
-    }
-    CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
+        CGF.EmitScalarConversion(Load, SrcTy, DestLVal.getType(), Loc);
+    CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal);
   }
 }
 
 // emit a flat cast where the RHS is an aggregate
-static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address DestVal,
-                                    QualType DestTy, Address SrcVal,
-                                    QualType SrcTy, SourceLocation Loc) {
+static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue DestVal,
+                                    LValue SrcVal, SourceLocation Loc) {
   // Flatten our destination
-  SmallVector<QualType, 16> DestTypes; // Flattened type
-  SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
-  // ^^ Flattened accesses to DestVal we want to store into
-  CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
+  SmallVector<LValue, 16> StoreList;
+  CGF.FlattenAccessAndTypeLValue(DestVal, StoreList);
   // Flatten our src
-  SmallVector<QualType, 16> SrcTypes; // Flattened type
-  SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
-  // ^^ Flattened accesses to SrcVal we want to load from
-  CGF.FlattenAccessAndType(SrcVal, SrcTy, LoadGEPList, SrcTypes);
+  SmallVector<LValue, 16> LoadList;
+  CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList);
 
-  assert(StoreGEPList.size() <= LoadGEPList.size() &&
-         "Cannot perform HLSL flat cast when flattened source object \
+  assert(StoreList.size() <= LoadList.size() &&
+         "Cannot perform HLSL elementwise cast when flattened source object \
           has less elements than flattened destination object.");
-  // apply casts to what we load from LoadGEPList
+  // apply casts to what we load from LoadList
   // and store result in Dest
-  for (unsigned I = 0, E = StoreGEPList.size(); I < E; I++) {
-    llvm::Value *Idx = LoadGEPList[I].second;
-    llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load");
-    Load =
-        Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
-    llvm::Value *Cast =
-        CGF.EmitScalarConversion(Load, SrcTypes[I], DestTypes[I], Loc);
-
-    // store back
-    Idx = StoreGEPList[I].second;
-    if (Idx) {
-      llvm::Value *V =
-          CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
-      Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
-    }
-    CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
+  for (unsigned I = 0, E = StoreList.size(); I < E; I++) {
+    LValue DestLVal = StoreList[I];
+    LValue SrcLVal = LoadList[I];
+    RValue RVal = CGF.EmitLoadOfLValue(SrcLVal, Loc);
+    assert(RVal.isScalar() && "All flattened source values should be scalars");
+    llvm::Value *Val = RVal.getScalarVal();
+    llvm::Value *Cast = CGF.EmitScalarConversion(Val, SrcLVal.getType(),
+                                                 DestLVal.getType(), Loc);
+    CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal);
   }
 }
 
@@ -988,31 +950,33 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
     Expr *Src = E->getSubExpr();
     QualType SrcTy = Src->getType();
     RValue RV = CGF.EmitAnyExpr(Src);
-    QualType DestTy = E->getType();
-    Address DestVal = Dest.getAddress();
+    LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
     SourceLocation Loc = E->getExprLoc();
 
-    assert(RV.isScalar() && "RHS of HLSL splat cast must be a scalar.");
+    assert(RV.isScalar() && SrcTy->isScalarType() &&
+           "RHS of HLSL splat cast must be a scalar.");
     llvm::Value *SrcVal = RV.getScalarVal();
-    EmitHLSLAggregateSplatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+    EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc);
     break;
   }
   case CK_HLSLElementwiseCast: {
     Expr *Src = E->getSubExpr();
     QualType SrcTy = Src->getType();
     RValue RV = CGF.EmitAnyExpr(Src);
-    QualType DestTy = E->getType();
-    Address DestVal = Dest.getAddress();
+    LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
     SourceLocation Loc = E->getExprLoc();
 
     if (RV.isScalar()) {
       llvm::Value *SrcVal = RV.getScalarVal();
-      EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+      assert(SrcTy->isVectorType() &&
+             "HLSL Elementwise cast doesn't handle splatting.");
+      EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc);
     } else {
       assert(RV.isAggregate() &&
              "Can't perform HLSL Aggregate cast on a complex type.");
       Address SrcVal = RV.getAggregateAddress();
-      EmitHLSLElementwiseCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+      EmitHLSLElementwiseCast(CGF, DestLVal, CGF.MakeAddrLValue(SrcVal, SrcTy),
+                              Loc);
     }
     break;
   }
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index c52526c..290c2e0 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -1655,11 +1655,16 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
     RValue RV =
       EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs);
 
-    // Set !heapallocsite metadata on the call to operator new.
-    if (getDebugInfo())
-      if (auto *newCall = dyn_cast<llvm::CallBase>(RV.getScalarVal()))
-        getDebugInfo()->addHeapAllocSiteMetadata(newCall, allocType,
-                                                 E->getExprLoc());
+    if (auto *newCall = dyn_cast<llvm::CallBase>(RV.getScalarVal())) {
+      if (auto *CGDI = getDebugInfo()) {
+        // Set !heapallocsite metadata on the call to operator new.
+        CGDI->addHeapAllocSiteMetadata(newCall, allocType, E->getExprLoc());
+      }
+      if (SanOpts.has(SanitizerKind::AllocToken)) {
+        // Set !alloc_token metadata.
+        EmitAllocToken(newCall, allocType);
+      }
+    }
 
     // If this was a call to a global replaceable allocation function that does
     // not take an alignment argument, the allocator is known to produce
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index c961222..06d9d81 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2397,39 +2397,37 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) {
 }
 
 // RHS is an aggregate type
-static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address RHSVal,
-                                      QualType RHSTy, QualType LHSTy,
-                                      SourceLocation Loc) {
-  SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
-  SmallVector<QualType, 16> SrcTypes; // Flattened type
-  CGF.FlattenAccessAndType(RHSVal, RHSTy, LoadGEPList, SrcTypes);
-  // LHS is either a vector or a builtin?
+static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
+                                      QualType DestTy, SourceLocation Loc) {
+  SmallVector<LValue, 16> LoadList;
+  CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList);
+  // Dest is either a vector or a builtin?
   // if its a vector create a temp alloca to store into and return that
-  if (auto *VecTy = LHSTy->getAs<VectorType>()) {
-    assert(SrcTypes.size() >= VecTy->getNumElements() &&
-           "Flattened type on RHS must have more elements than vector on LHS.");
+  if (auto *VecTy = DestTy->getAs<VectorType>()) {
+    assert(LoadList.size() >= VecTy->getNumElements() &&
+           "Flattened type on RHS must have the same number or more elements "
+           "than vector on LHS.");
     llvm::Value *V =
-        CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp"));
+        CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
     // write to V.
     for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) {
-      llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load");
-      llvm::Value *Idx = LoadGEPList[I].second;
-      Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract")
-                 : Load;
-      llvm::Value *Cast = CGF.EmitScalarConversion(
-          Load, SrcTypes[I], VecTy->getElementType(), Loc);
+      RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
+      assert(RVal.isScalar() &&
+             "All flattened source values should be scalars.");
+      llvm::Value *Cast =
+          CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
+                                   VecTy->getElementType(), Loc);
       V = CGF.Builder.CreateInsertElement(V, Cast, I);
     }
     return V;
   }
-  // i its a builtin just do an extract element or load.
-  assert(LHSTy->isBuiltinType() &&
+  // if its a builtin just do an extract element or load.
+  assert(DestTy->isBuiltinType() &&
          "Destination type must be a vector or builtin type.");
-  llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load");
-  llvm::Value *Idx = LoadGEPList[0].second;
-  Load =
-      Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
-  return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc);
+  RValue RVal = CGF.EmitLoadOfLValue(LoadList[0], Loc);
+  assert(RVal.isScalar() && "All flattened source values should be scalars.");
+  return CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[0].getType(),
+                                  DestTy, Loc);
 }
 
 // VisitCastExpr - Emit code for an explicit or implicit cast.  Implicit casts
@@ -2954,12 +2952,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   case CK_HLSLElementwiseCast: {
     RValue RV = CGF.EmitAnyExpr(E);
     SourceLocation Loc = CE->getExprLoc();
-    QualType SrcTy = E->getType();
 
     assert(RV.isAggregate() && "Not a valid HLSL Elementwise Cast.");
     // RHS is an aggregate
-    Address SrcVal = RV.getAggregateAddress();
-    return EmitHLSLElementwiseCast(CGF, SrcVal, SrcTy, DestTy, Loc);
+    LValue SrcVal = CGF.MakeAddrLValue(RV.getAggregateAddress(), E->getType());
+    return EmitHLSLElementwiseCast(CGF, SrcVal, DestTy, Loc);
   }
   } // end of switch
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8cda583..fa94692 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6808,12 +6808,13 @@ public:
   /// they were computed by collectAttachPtrExprInfo(), if they are semantically
   /// different.
   struct AttachPtrExprComparator {
-    const MappableExprsHandler *Handler = nullptr;
+    const MappableExprsHandler &Handler;
     // Cache of previous equality comparison results.
     mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
         CachedEqualityComparisons;
 
-    AttachPtrExprComparator(const MappableExprsHandler *H) : Handler(H) {}
+    AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
+    AttachPtrExprComparator() = delete;
 
     // Return true iff LHS is "less than" RHS.
     bool operator()(const Expr *LHS, const Expr *RHS) const {
@@ -6821,15 +6822,15 @@ public:
         return false;
 
       // First, compare by complexity (depth)
-      const auto ItLHS = Handler->AttachPtrComponentDepthMap.find(LHS);
-      const auto ItRHS = Handler->AttachPtrComponentDepthMap.find(RHS);
+      const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
+      const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
 
       std::optional<size_t> DepthLHS =
-          (ItLHS != Handler->AttachPtrComponentDepthMap.end()) ? ItLHS->second
-                                                               : std::nullopt;
+          (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
+                                                              : std::nullopt;
       std::optional<size_t> DepthRHS =
-          (ItRHS != Handler->AttachPtrComponentDepthMap.end()) ? ItRHS->second
-                                                               : std::nullopt;
+          (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
+                                                              : std::nullopt;
 
       // std::nullopt (no attach pointer) has lowest complexity
       if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
@@ -6877,8 +6878,8 @@ public:
     /// Returns true iff LHS was computed before RHS by
     /// collectAttachPtrExprInfo().
     bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
-      const size_t &OrderLHS = Handler->AttachPtrComputationOrderMap.at(LHS);
-      const size_t &OrderRHS = Handler->AttachPtrComputationOrderMap.at(RHS);
+      const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
+      const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
 
       return OrderLHS < OrderRHS;
     }
@@ -6897,7 +6898,7 @@ public:
       if (!LHS || !RHS)
         return false;
 
-      ASTContext &Ctx = Handler->CGF.getContext();
+      ASTContext &Ctx = Handler.CGF.getContext();
       // Strip away parentheses and no-op casts to get to the core expression
       LHS = LHS->IgnoreParenNoopCasts(Ctx);
       RHS = RHS->IgnoreParenNoopCasts(Ctx);
@@ -7246,6 +7247,10 @@ private:
   llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
       {nullptr, 0}};
 
+  /// An instance of attach-ptr-expr comparator that can be used throughout the
+  /// lifetime of this handler.
+  AttachPtrExprComparator AttachPtrComparator;
+
   llvm::Value *getExprTypeSize(const Expr *E) const {
     QualType ExprTy = E->getType().getCanonicalType();
 
@@ -8963,7 +8968,7 @@ private:
 
 public:
   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
-      : CurDir(&Dir), CGF(CGF) {
+      : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
     // Extract firstprivate clause information.
     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
       for (const auto *D : C->varlist())
@@ -9009,7 +9014,7 @@ public:
 
   /// Constructor for the declare mapper directive.
   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
-      : CurDir(&Dir), CGF(CGF) {}
+      : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
 
   /// Generate code for the combined entry if we have a partially mapped struct
   /// and take care of the mapping flags of the arguments corresponding to
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index 810d6aa..3a7ee54 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -163,12 +163,14 @@ public:
                           SourceLocation Loc) override;
 
   // Currently unsupported on the device.
+  using CGOpenMPRuntime::emitMessageClause;
   llvm::Value *emitMessageClause(CodeGenFunction &CGF, const Expr *Message,
                                  SourceLocation Loc) override;
 
   // Currently unsupported on the device.
-  virtual llvm::Value *emitSeverityClause(OpenMPSeverityClauseKind Severity,
-                                          SourceLocation Loc) override;
+  using CGOpenMPRuntime::emitSeverityClause;
+  llvm::Value *emitSeverityClause(OpenMPSeverityClauseKind Severity,
+                                  SourceLocation Loc) override;
 
   /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
   /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index b2fe917..acf8de4 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -846,6 +846,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
       Fn->addFnAttr(llvm::Attribute::SanitizeNumericalStability);
     if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory))
       Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+    if (SanOpts.has(SanitizerKind::AllocToken))
+      Fn->addFnAttr(llvm::Attribute::SanitizeAllocToken);
   }
   if (SanOpts.has(SanitizerKind::SafeStack))
     Fn->addFnAttr(llvm::Attribute::SafeStack);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index f0565c1..e14e60c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3348,6 +3348,9 @@ public:
   SanitizerAnnotateDebugInfo(ArrayRef<SanitizerKind::SanitizerOrdinal> Ordinals,
                              SanitizerHandler Handler);
 
+  /// Emit additional metadata used by the AllocToken instrumentation.
+  void EmitAllocToken(llvm::CallBase *CB, QualType AllocType);
+
   llvm::Value *GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FD,
                                         const FieldDecl *CountDecl);
 
@@ -4464,10 +4467,8 @@ public:
                                 AggValueSlot slot = AggValueSlot::ignored());
   LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e);
 
-  void FlattenAccessAndType(
-      Address Addr, QualType AddrTy,
-      SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList,
-      SmallVectorImpl<QualType> &FlatTypes);
+  void FlattenAccessAndTypeLValue(LValue LVal,
+                                  SmallVectorImpl<LValue> &AccessList);
 
   llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface,
                               const ObjCIvarDecl *Ivar);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index f6f7f22..8d019d4 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -493,10 +493,15 @@ CodeGenModule::CodeGenModule(ASTContext &C,
     auto ReaderOrErr = llvm::IndexedInstrProfReader::create(
         CodeGenOpts.ProfileInstrumentUsePath, *FS,
         CodeGenOpts.ProfileRemappingFile);
-    // We're checking for profile read errors in CompilerInvocation, so if
-    // there was an error it should've already been caught. If it hasn't been
-    // somehow, trip an assertion.
-    assert(ReaderOrErr);
+    if (auto E = ReaderOrErr.takeError()) {
+      unsigned DiagID = Diags.getCustomDiagID(
+          DiagnosticsEngine::Error, "Error in reading profile %0: %1");
+      llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
+        Diags.Report(DiagID)
+            << CodeGenOpts.ProfileInstrumentUsePath << EI.message();
+      });
+      return;
+    }
     PGOReader = std::move(ReaderOrErr.get());
   }
 
diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp
index e19daa9..72a42a6 100644
--- a/clang/lib/Driver/Action.cpp
+++ b/clang/lib/Driver/Action.cpp
@@ -43,7 +43,7 @@ const char *Action::getClassName(ActionClass AC) {
   case OffloadUnbundlingJobClass:
     return "clang-offload-unbundler";
   case OffloadPackagerJobClass:
-    return "clang-offload-packager";
+    return "llvm-offload-binary";
   case LinkerWrapperJobClass:
     return "clang-linker-wrapper";
   case StaticLibJobClass:
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 98f5efb..eb5d542 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -57,6 +57,9 @@ std::string aarch64::getAArch64TargetCPU(const ArgList &Args,
     // iOS 26 only runs on apple-a12 and later CPUs.
     if (!Triple.isOSVersionLT(26))
       return "apple-a12";
+    // arm64 (non-e) iOS 18 only runs on apple-a10 and later CPUs.
+    if (!Triple.isOSVersionLT(18) && !Triple.isArm64e())
+      return "apple-a10";
   }
 
   if (Triple.isWatchOS()) {
@@ -64,8 +67,8 @@ std::string aarch64::getAArch64TargetCPU(const ArgList &Args,
     // arm64_32/arm64e watchOS requires S4 before watchOS 26, S6 after.
     if (Triple.getArch() == llvm::Triple::aarch64_32 || Triple.isArm64e())
       return Triple.isOSVersionLT(26) ? "apple-s4" : "apple-s6";
-    // arm64 (non-e, non-32) watchOS comes later, and requires S6 anyway.
-    return "apple-s6";
+    // arm64 (non-e, non-32) watchOS comes later, and requires S9 anyway.
+    return "apple-s9";
   }
 
   if (Triple.isXROS()) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 684cc09..107b9ff 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Frontend/Debug/Options.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Compression.h"
@@ -485,19 +486,47 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
   }
 
   if (ProfileUseArg) {
+    SmallString<128> UsePathBuf;
+    StringRef UsePath;
     if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
-      CmdArgs.push_back(Args.MakeArgString(
-          Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue()));
+      UsePath = ProfileUseArg->getValue();
     else if ((ProfileUseArg->getOption().matches(
                   options::OPT_fprofile_use_EQ) ||
               ProfileUseArg->getOption().matches(
                   options::OPT_fprofile_instr_use))) {
-      SmallString<128> Path(
-          ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
-      if (Path.empty() || llvm::sys::fs::is_directory(Path))
-        llvm::sys::path::append(Path, "default.profdata");
+      UsePathBuf =
+          ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue();
+      if (UsePathBuf.empty() || llvm::sys::fs::is_directory(UsePathBuf))
+        llvm::sys::path::append(UsePathBuf, "default.profdata");
+      UsePath = UsePathBuf;
+    }
+    auto ReaderOrErr =
+        llvm::IndexedInstrProfReader::create(UsePath, D.getVFS());
+    if (auto E = ReaderOrErr.takeError()) {
+      auto DiagID = D.getDiags().getCustomDiagID(
+          DiagnosticsEngine::Error, "Error in reading profile %0: %1");
+      llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
+        D.Diag(DiagID) << UsePath.str() << EI.message();
+      });
+    } else {
+      std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader =
+          std::move(ReaderOrErr.get());
+      StringRef UseKind;
+      // Currently memprof profiles are only added at the IR level. Mark the
+      // profile type as IR in that case as well and the subsequent matching
+      // needs to detect which is available (might be one or both).
+      if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) {
+        if (PGOReader->hasCSIRLevelProfile())
+          UseKind = "csllvm";
+        else
+          UseKind = "llvm";
+      } else
+        UseKind = "clang";
+
+      CmdArgs.push_back(
+          Args.MakeArgString("-fprofile-instrument-use=" + UseKind));
       CmdArgs.push_back(
-          Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path));
+          Args.MakeArgString("-fprofile-instrument-use-path=" + UsePath));
     }
   }
 
diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h
index c227895..9adad5c 100644
--- a/clang/lib/Driver/ToolChains/Clang.h
+++ b/clang/lib/Driver/ToolChains/Clang.h
@@ -163,7 +163,7 @@ public:
 class LLVM_LIBRARY_VISIBILITY OffloadPackager final : public Tool {
 public:
   OffloadPackager(const ToolChain &TC)
-      : Tool("Offload::Packager", "clang-offload-packager", TC) {}
+      : Tool("Offload::Packager", "llvm-offload-binary", TC) {}
 
   bool hasIntegratedCPP() const override { return false; }
   void ConstructJob(Compilation &C, const JobAction &JA,
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 4223752..50fd50a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -533,9 +533,9 @@ static T extractMaskValue(T KeyPath) {
 #define PARSE_OPTION_WITH_MARSHALLING(                                         \
     ARGS, DIAGS, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS,         \
     ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS,       \
-    METAVAR, VALUES, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE,        \
-    IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \
-    TABLE_INDEX)                                                               \
+    METAVAR, VALUES, SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \
+    DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER,     \
+    MERGER, EXTRACTOR, TABLE_INDEX)                                            \
   if ((VISIBILITY) & options::CC1Option) {                                     \
     KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE);                                  \
     if (IMPLIED_CHECK)                                                         \
@@ -551,8 +551,9 @@ static T extractMaskValue(T KeyPath) {
 #define GENERATE_OPTION_WITH_MARSHALLING(                                      \
     CONSUMER, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \
     FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \
-    SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK,          \
-    IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX)   \
+    SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE,   \
+    IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \
+    TABLE_INDEX)                                                               \
   if ((VISIBILITY) & options::CC1Option) {                                     \
     [&](const auto &Extracted) {                                               \
       if (ALWAYS_EMIT ||                                                       \
@@ -1473,34 +1474,6 @@ static std::string serializeXRayInstrumentationBundle(const XRayInstrSet &S) {
   return Buffer;
 }
 
-// Set the profile kind using fprofile-instrument-use-path.
-static void setPGOUseInstrumentor(CodeGenOptions &Opts,
-                                  const Twine &ProfileName,
-                                  llvm::vfs::FileSystem &FS,
-                                  DiagnosticsEngine &Diags) {
-  auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName, FS);
-  if (auto E = ReaderOrErr.takeError()) {
-    unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-                                            "Error in reading profile %0: %1");
-    llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
-      Diags.Report(DiagID) << ProfileName.str() << EI.message();
-    });
-    return;
-  }
-  std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader =
-    std::move(ReaderOrErr.get());
-  // Currently memprof profiles are only added at the IR level. Mark the profile
-  // type as IR in that case as well and the subsequent matching needs to detect
-  // which is available (might be one or both).
-  if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) {
-    if (PGOReader->hasCSIRLevelProfile())
-      Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileCSIRInstr);
-    else
-      Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileIRInstr);
-  } else
-    Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileClangInstr);
-}
-
 void CompilerInvocation::setDefaultPointerAuthOptions(
     PointerAuthOptions &Opts, const LangOptions &LangOpts,
     const llvm::Triple &Triple) {
@@ -5090,16 +5063,10 @@ bool CompilerInvocation::CreateFromArgsImpl(
     append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs);
   }
 
-  // Set PGOOptions. Need to create a temporary VFS to read the profile
-  // to determine the PGO type.
-  if (!Res.getCodeGenOpts().ProfileInstrumentUsePath.empty()) {
-    auto FS =
-        createVFSFromOverlayFiles(Res.getHeaderSearchOpts().VFSOverlayFiles,
-                                  Diags, llvm::vfs::getRealFileSystem());
-    setPGOUseInstrumentor(Res.getCodeGenOpts(),
-                          Res.getCodeGenOpts().ProfileInstrumentUsePath, *FS,
-                          Diags);
-  }
+  if (!Res.getCodeGenOpts().ProfileInstrumentUsePath.empty() &&
+      Res.getCodeGenOpts().getProfileUse() ==
+          llvm::driver::ProfileInstrKind::ProfileNone)
+    Diags.Report(diag::err_drv_profile_instrument_use_path_with_no_kind);
 
   FixupInvocation(Res, Diags, Args, DashX);
 
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 4bd7981..142cc07 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -41,7 +41,8 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
-static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtsh_h(__m512h __a) {
   return __a[0];
 }
 
@@ -111,7 +112,7 @@ static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ph(
                        e9, e8, e7, e6, e5, e4, e3, e2, e1, e0);
 }
 
-static __inline __m512h __DEFAULT_FN_ATTRS512
+static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_pch(_Float16 _Complex __h) {
   return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, __h));
 }
@@ -192,17 +193,17 @@ _mm512_castsi512_ph(__m512i __a) {
   return (__m512h)__a;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_castph256_ph128(__m256h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS512
+static __inline__ __m128h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castph512_ph128(__m512h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS512
+static __inline__ __m256h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castph512_ph256(__m512h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                                  12, 13, 14, 15);
diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index c0bcc08..5b2b3f0 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -34,11 +34,13 @@
 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
-static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
+static __inline__ _Float16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtsh_h(__m128h __a) {
   return __a[0];
 }
 
-static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) {
+static __inline__ _Float16 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtsh_h(__m256h __a) {
   return __a[0];
 }
 
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index 6206a34..414f10a 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -82,6 +82,8 @@
 #define __opencl_c_read_write_images 1
 #endif // defined(__SPIR__)
 
+#endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300)
+
 // Undefine any feature macros that have been explicitly disabled using
 // an __undef_<feature> macro.
 #ifdef __undef___opencl_c_work_group_collective_functions
@@ -99,8 +101,12 @@
 #ifdef __undef___opencl_c_read_write_images
 #undef __opencl_c_read_write_images
 #endif
-
-#endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300)
+#ifdef __undef___opencl_c_integer_dot_product_input_4x8bit
+#undef __opencl_c_integer_dot_product_input_4x8bit
+#endif
+#ifdef __undef___opencl_c_integer_dot_product_input_4x8bit_packed
+#undef __opencl_c_integer_dot_product_input_4x8bit_packed
+#endif
 
 #if !defined(__opencl_c_generic_address_space)
 // Internal feature macro to provide named (global, local, private) address
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3cc61b1..063db05 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -8811,8 +8811,10 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
       case ArgType::Match:
       case ArgType::MatchPromotion:
       case ArgType::NoMatchPromotionTypeConfusion:
-      case ArgType::NoMatchSignedness:
         llvm_unreachable("expected non-matching");
+      case ArgType::NoMatchSignedness:
+        Diag = diag::warn_format_conversion_argument_type_mismatch_signedness;
+        break;
       case ArgType::NoMatchPedantic:
         Diag = diag::warn_format_conversion_argument_type_mismatch_pedantic;
         break;
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 999e302c..f4df63c 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -280,6 +280,11 @@ public:
     if (T->getDepth() >= TemplateArgs.getNumLevels())
       return true;
 
+    // There might not be a corresponding template argument before substituting
+    // into the parameter mapping, e.g. a sizeof... expression.
+    if (!TemplateArgs.hasTemplateArgument(T->getDepth(), T->getIndex()))
+      return true;
+
     TemplateArgument Arg = TemplateArgs(T->getDepth(), T->getIndex());
 
     if (T->isParameterPack() && SemaRef.ArgPackSubstIndex) {
@@ -300,6 +305,12 @@ public:
     if (!NTTP)
       return TraverseDecl(D);
 
+    if (NTTP->getDepth() >= TemplateArgs.getNumLevels())
+      return true;
+
+    if (!TemplateArgs.hasTemplateArgument(NTTP->getDepth(), NTTP->getIndex()))
+      return true;
+
     TemplateArgument Arg = TemplateArgs(NTTP->getDepth(), NTTP->getPosition());
     if (NTTP->isParameterPack() && SemaRef.ArgPackSubstIndex) {
       assert(Arg.getKind() == TemplateArgument::Pack &&
@@ -326,17 +337,25 @@ public:
     return inherited::TraverseDecl(D);
   }
 
+  bool TraverseCallExpr(CallExpr *CE) {
+    inherited::TraverseStmt(CE->getCallee());
+
+    for (Expr *Arg : CE->arguments())
+      inherited::TraverseStmt(Arg);
+
+    return true;
+  }
+
   bool TraverseTypeLoc(TypeLoc TL, bool TraverseQualifier = true) {
     // We don't care about TypeLocs. So traverse Types instead.
-    return TraverseType(TL.getType(), TraverseQualifier);
+    return TraverseType(TL.getType().getCanonicalType(), TraverseQualifier);
   }
 
   bool TraverseTagType(const TagType *T, bool TraverseQualifier) {
     // T's parent can be dependent while T doesn't have any template arguments.
     // We should have already traversed its qualifier.
     // FIXME: Add an assert to catch cases where we failed to profile the
-    // concept. assert(!T->isDependentType() && "We missed a case in profiling
-    // concepts!");
+    // concept.
     return true;
   }
 
@@ -701,7 +720,6 @@ ExprResult ConstraintSatisfactionChecker::Evaluate(
 
   if (auto Iter = S.UnsubstitutedConstraintSatisfactionCache.find(ID);
       Iter != S.UnsubstitutedConstraintSatisfactionCache.end()) {
-
     auto &Cached = Iter->second.Satisfaction;
     Satisfaction.ContainsErrors = Cached.ContainsErrors;
     Satisfaction.IsSatisfied = Cached.IsSatisfied;
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index d27f767..215431c 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -9546,14 +9546,32 @@ bool SpecialMemberDeletionInfo::shouldDeleteForSubobjectCall(
   CXXMethodDecl *Decl = SMOR.getMethod();
   FieldDecl *Field = Subobj.dyn_cast<FieldDecl*>();
 
-  int DiagKind = -1;
-
-  if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::NoMemberOrDeleted)
-    DiagKind = !Decl ? 0 : 1;
-  else if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::Ambiguous)
-    DiagKind = 2;
+  enum {
+    NotSet = -1,
+    NoDecl,
+    DeletedDecl,
+    MultipleDecl,
+    InaccessibleDecl,
+    NonTrivialDecl
+  } DiagKind = NotSet;
+
+  if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::NoMemberOrDeleted) {
+    if (CSM == CXXSpecialMemberKind::DefaultConstructor && Field &&
+        Field->getParent()->isUnion()) {
+      // [class.default.ctor]p2:
+      //   A defaulted default constructor for class X is defined as deleted if
+      //   - X is a union that has a variant member with a non-trivial default
+      //     constructor and no variant member of X has a default member
+      //     initializer
+      const auto *RD = cast<CXXRecordDecl>(Field->getParent());
+      if (RD->hasInClassInitializer())
+        return false;
+    }
+    DiagKind = !Decl ? NoDecl : DeletedDecl;
+  } else if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::Ambiguous)
+    DiagKind = MultipleDecl;
   else if (!isAccessible(Subobj, Decl))
-    DiagKind = 3;
+    DiagKind = InaccessibleDecl;
   else if (!IsDtorCallInCtor && Field && Field->getParent()->isUnion() &&
            !Decl->isTrivial()) {
     // A member of a union must have a trivial corresponding special member.
@@ -9569,13 +9587,13 @@ bool SpecialMemberDeletionInfo::shouldDeleteForSubobjectCall(
       //     initializer
       const auto *RD = cast<CXXRecordDecl>(Field->getParent());
       if (!RD->hasInClassInitializer())
-        DiagKind = 4;
+        DiagKind = NonTrivialDecl;
     } else {
-      DiagKind = 4;
+      DiagKind = NonTrivialDecl;
     }
   }
 
-  if (DiagKind == -1)
+  if (DiagKind == NotSet)
     return false;
 
   if (Diagnose) {
@@ -9593,9 +9611,9 @@ bool SpecialMemberDeletionInfo::shouldDeleteForSubobjectCall(
           << /*IsObjCPtr*/ false;
     }
 
-    if (DiagKind == 1)
+    if (DiagKind == DeletedDecl)
       S.NoteDeletedFunction(Decl);
-    // FIXME: Explain inaccessibility if DiagKind == 3.
+    // FIXME: Explain inaccessibility if DiagKind == InaccessibleDecl.
   }
 
   return true;
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index fa30c66b..09e5d69 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -598,18 +598,17 @@ void SemaHLSL::ActOnFinishBuffer(Decl *Dcl, SourceLocation RBrace) {
 
   validatePackoffset(SemaRef, BufDecl);
 
-  // create buffer layout struct
   createHostLayoutStructForBuffer(SemaRef, BufDecl);
 
-  HLSLVkBindingAttr *VkBinding = Dcl->getAttr<HLSLVkBindingAttr>();
-  HLSLResourceBindingAttr *RBA = Dcl->getAttr<HLSLResourceBindingAttr>();
-  if (!VkBinding && (!RBA || !RBA->hasRegisterSlot())) {
+  // Handle implicit binding if needed.
+  ResourceBindingAttrs ResourceAttrs(Dcl);
+  if (!ResourceAttrs.isExplicit()) {
     SemaRef.Diag(Dcl->getLocation(), diag::warn_hlsl_implicit_binding);
     // Use HLSLResourceBindingAttr to transfer implicit binding order_ID
     // to codegen. If it does not exist, create an implicit attribute.
     uint32_t OrderID = getNextImplicitBindingOrderID();
-    if (RBA)
-      RBA->setImplicitBindingOrderID(OrderID);
+    if (ResourceAttrs.hasBinding())
+      ResourceAttrs.setImplicitOrderID(OrderID);
     else
       addImplicitBindingAttrToDecl(SemaRef, BufDecl,
                                    BufDecl->isCBuffer() ? RegisterType::CBuffer
@@ -1289,8 +1288,8 @@ bool SemaHLSL::handleRootSignatureElements(
       VerifyRegister(Loc, Descriptor->Reg.Number);
       VerifySpace(Loc, Descriptor->Space);
 
-      if (!llvm::hlsl::rootsig::verifyRootDescriptorFlag(
-              Version, llvm::to_underlying(Descriptor->Flags)))
+      if (!llvm::hlsl::rootsig::verifyRootDescriptorFlag(Version,
+                                                         Descriptor->Flags))
         ReportFlagError(Loc);
     } else if (const auto *Constants =
                    std::get_if<llvm::hlsl::rootsig::RootConstants>(&Elem)) {
@@ -1590,10 +1589,6 @@ void SemaHLSL::handleVkConstantIdAttr(Decl *D, const ParsedAttr &AL) {
 }
 
 void SemaHLSL::handleVkBindingAttr(Decl *D, const ParsedAttr &AL) {
-  // The vk::binding attribute only applies to SPIR-V.
-  if (!getASTContext().getTargetInfo().getTriple().isSPIRV())
-    return;
-
   uint32_t Binding = 0;
   if (!SemaRef.checkUInt32Argument(AL, AL.getArgAsExpr(0), Binding))
     return;
@@ -3571,9 +3566,6 @@ bool SemaHLSL::CanPerformAggregateSplatCast(Expr *Src, QualType DestTy) {
   if (SrcVecTy)
     SrcTy = SrcVecTy->getElementType();
 
-  if (ContainsBitField(DestTy))
-    return false;
-
   llvm::SmallVector<QualType> DestTypes;
   BuildFlattenedTypeList(DestTy, DestTypes);
 
@@ -3600,9 +3592,6 @@ bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) {
       (DestTy->isScalarType() || DestTy->isVectorType()))
     return false;
 
-  if (ContainsBitField(DestTy) || ContainsBitField(SrcTy))
-    return false;
-
   llvm::SmallVector<QualType> DestTypes;
   BuildFlattenedTypeList(DestTy, DestTypes);
   llvm::SmallVector<QualType> SrcTypes;
@@ -3786,17 +3775,15 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) {
       // If the resource array does not have an explicit binding attribute,
       // create an implicit one. It will be used to transfer implicit binding
       // order_ID to codegen.
-      if (!VD->hasAttr<HLSLVkBindingAttr>()) {
-        HLSLResourceBindingAttr *RBA = VD->getAttr<HLSLResourceBindingAttr>();
-        if (!RBA || !RBA->hasRegisterSlot()) {
-          uint32_t OrderID = getNextImplicitBindingOrderID();
-          if (RBA)
-            RBA->setImplicitBindingOrderID(OrderID);
-          else
-            addImplicitBindingAttrToDecl(
-                SemaRef, VD, getRegisterType(getResourceArrayHandleType(VD)),
-                OrderID);
-        }
+      ResourceBindingAttrs Binding(VD);
+      if (!Binding.isExplicit()) {
+        uint32_t OrderID = getNextImplicitBindingOrderID();
+        if (Binding.hasBinding())
+          Binding.setImplicitOrderID(OrderID);
+        else
+          addImplicitBindingAttrToDecl(
+              SemaRef, VD, getRegisterType(getResourceArrayHandleType(VD)),
+              OrderID);
       }
     }
   }
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 922fcac..543db46 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -3920,6 +3920,7 @@ bool InitializationSequence::isAmbiguous() const {
   case FK_AddressOfUnaddressableFunction:
   case FK_ParenthesizedListInitFailed:
   case FK_DesignatedInitForNonAggregate:
+  case FK_HLSLInitListFlatteningFailed:
     return false;
 
   case FK_ReferenceInitOverloadFailed:
@@ -4882,8 +4883,10 @@ static void TryListInitialization(Sema &S,
                                   bool TreatUnavailableAsInvalid) {
   QualType DestType = Entity.getType();
 
-  if (S.getLangOpts().HLSL && !S.HLSL().transformInitList(Entity, InitList))
+  if (S.getLangOpts().HLSL && !S.HLSL().transformInitList(Entity, InitList)) {
+    Sequence.SetFailed(InitializationSequence::FK_HLSLInitListFlatteningFailed);
     return;
+  }
 
   // C++ doesn't allow scalar initialization with more than one argument.
   // But C99 complex numbers are scalars and it makes sense there.
@@ -6817,33 +6820,18 @@ void InitializationSequence::InitializeFrom(Sema &S,
   assert(Args.size() >= 1 && "Zero-argument case handled above");
 
   // For HLSL ext vector types we allow list initialization behavior for C++
-  // constructor syntax. This is accomplished by converting initialization
-  // arguments an InitListExpr late.
+  // functional cast expressions which look like constructor syntax. This is
+  // accomplished by converting initialization arguments to InitListExpr.
   if (S.getLangOpts().HLSL && Args.size() > 1 && DestType->isExtVectorType() &&
       (SourceType.isNull() ||
        !Context.hasSameUnqualifiedType(SourceType, DestType))) {
-
-    llvm::SmallVector<Expr *> InitArgs;
-    for (auto *Arg : Args) {
-      if (Arg->getType()->isExtVectorType()) {
-        const auto *VTy = Arg->getType()->castAs<ExtVectorType>();
-        unsigned Elm = VTy->getNumElements();
-        for (unsigned Idx = 0; Idx < Elm; ++Idx) {
-          InitArgs.emplace_back(new (Context) ArraySubscriptExpr(
-              Arg,
-              IntegerLiteral::Create(
-                  Context, llvm::APInt(Context.getIntWidth(Context.IntTy), Idx),
-                  Context.IntTy, SourceLocation()),
-              VTy->getElementType(), Arg->getValueKind(), Arg->getObjectKind(),
-              SourceLocation()));
-        }
-      } else
-        InitArgs.emplace_back(Arg);
-    }
-    InitListExpr *ILE = new (Context) InitListExpr(
-        S.getASTContext(), SourceLocation(), InitArgs, SourceLocation());
+    InitListExpr *ILE = new (Context)
+        InitListExpr(S.getASTContext(), Args.front()->getBeginLoc(), Args,
+                     Args.back()->getEndLoc());
+    ILE->setType(DestType);
     Args[0] = ILE;
-    AddListInitializationStep(DestType);
+    TryListInitialization(S, Entity, Kind, ILE, *this,
+                          TreatUnavailableAsInvalid);
     return;
   }
 
@@ -9301,6 +9289,14 @@ bool InitializationSequence::Diagnose(Sema &S,
     break;
   }
 
+  case InitializationSequence::FK_HLSLInitListFlatteningFailed: {
+    // Unlike C/C++ list initialization, there is no fallback if it fails. This
+    // allows us to diagnose the failure when it happens in the
+    // TryListInitialization call instead of delaying the diagnosis, which is
+    // beneficial because the flattening is also expensive.
+    break;
+  }
+
   case FK_ExplicitConstructor: {
     S.Diag(Kind.getLocation(), diag::err_selected_explicit_constructor)
       << Args[0]->getSourceRange();
@@ -9499,6 +9495,10 @@ void InitializationSequence::dump(raw_ostream &OS) const {
     case FK_DesignatedInitForNonAggregate:
       OS << "designated initializer for non-aggregate type";
       break;
+
+    case FK_HLSLInitListFlatteningFailed:
+      OS << "HLSL initialization list flattening failed";
+      break;
     }
     OS << '\n';
     return;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 5657dfe..8d32ef6 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1087,14 +1087,14 @@ static bool shouldAddReversedEqEq(Sema &S, SourceLocation OpLoc,
 }
 
 bool OverloadCandidateSet::OperatorRewriteInfo::allowsReversed(
-    OverloadedOperatorKind Op) {
+    OverloadedOperatorKind Op) const {
   if (!AllowRewrittenCandidates)
     return false;
   return Op == OO_EqualEqual || Op == OO_Spaceship;
 }
 
 bool OverloadCandidateSet::OperatorRewriteInfo::shouldAddReversed(
-    Sema &S, ArrayRef<Expr *> OriginalArgs, FunctionDecl *FD) {
+    Sema &S, ArrayRef<Expr *> OriginalArgs, FunctionDecl *FD) const {
   auto Op = FD->getOverloadedOperator();
   if (!allowsReversed(Op))
     return false;
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 83d79b43..70baab5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -3812,6 +3812,15 @@ bool MallocChecker::mayFreeAnyEscapedMemoryOrIsModeledExplicitly(
     return true;
   }
 
+  // Protobuf function declared in `generated_message_util.h` that takes
+  // ownership of the second argument. As the first and third arguments are
+  // allocation arenas and won't be tracked by this checker, there is no reason
+  // to set `EscapingSymbol`. (Also, this is an implementation detail of
+  // Protobuf, so it's better to be a bit more permissive.)
+  if (FName == "GetOwnedMessageInternal") {
+    return true;
+  }
+
   // Handle cases where we know a buffer's /address/ can escape.
   // Note that the above checks handle some special cases where we know that
   // even though the address escapes, it's still our responsibility to free the
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
index 00a1b8b..66cfccb 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
@@ -31,9 +31,9 @@ bool tryToFindPtrOrigin(
     if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
       if (auto *VD = dyn_cast_or_null<VarDecl>(DRE->getDecl())) {
         auto QT = VD->getType();
-        if (VD->hasGlobalStorage() && QT.isConstQualified()) {
+        auto IsImmortal = safeGetName(VD) == "NSApp";
+        if (VD->hasGlobalStorage() && (IsImmortal || QT.isConstQualified()))
           return callback(E, true);
-        }
       }
     }
     if (auto *tempExpr = dyn_cast<MaterializeTemporaryExpr>(E)) {
@@ -208,6 +208,8 @@ bool tryToFindPtrOrigin(
       continue;
     }
     if (auto *BoxedExpr = dyn_cast<ObjCBoxedExpr>(E)) {
+      if (StopAtFirstRefCountedObj)
+        return callback(BoxedExpr, true);
       E = BoxedExpr->getSubExpr();
       continue;
     }
diff --git a/clang/lib/Testing/CommandLineArgs.cpp b/clang/lib/Testing/CommandLineArgs.cpp
index e9da72f..95f37e3 100644
--- a/clang/lib/Testing/CommandLineArgs.cpp
+++ b/clang/lib/Testing/CommandLineArgs.cpp
@@ -103,7 +103,8 @@ std::string getAnyTargetForTesting() {
     StringRef TargetName(Target.getName());
     if (TargetName == "x86-64")
       TargetName = "x86_64";
-    if (llvm::TargetRegistry::lookupTarget(TargetName, Error) == &Target) {
+    if (llvm::TargetRegistry::lookupTarget(llvm::Triple(TargetName), Error) ==
+        &Target) {
       return std::string(TargetName);
     }
   }
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
index e1f4d0d..b0096d8 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
@@ -509,6 +509,8 @@ bool initializeScanCompilerInstance(
   ScanInstance.getFrontendOpts().DisableFree = false;
   ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
   ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
+  ScanInstance.getFrontendOpts().GenReducedBMI = false;
+  ScanInstance.getFrontendOpts().ModuleOutputPath.clear();
   // This will prevent us compiling individual modules asynchronously since
   // FileManager is not thread-safe, but it does improve performance for now.
   ScanInstance.getFrontendOpts().ModulesShareFileManager = true;
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index d67178c..a117bec 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -263,6 +263,10 @@ makeCommonInvocationForModuleBuild(CompilerInvocation CI) {
   // units.
   CI.getFrontendOpts().Inputs.clear();
   CI.getFrontendOpts().OutputFile.clear();
+  CI.getFrontendOpts().GenReducedBMI = false;
+  CI.getFrontendOpts().ModuleOutputPath.clear();
+  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = false;
+  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = false;
   // LLVM options are not going to affect the AST
   CI.getFrontendOpts().LLVMArgs.clear();