61 files changed, 1301 insertions, 603 deletions
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2a95d1e..0e9fcaa 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -182,6 +182,8 @@ Bug Fixes to C++ Support
 - Fix a crash when deleting a pointer to an incomplete array (#GH150359).
 - Fix an assertion failure when expression in assumption attribute
   (``[[assume(expr)]]``) creates temporary objects.
+- Fix the dynamic_cast to final class optimization to correctly handle
+  casts that are guaranteed to fail (#GH137518).
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index 8c848a1..b52f716 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -876,21 +876,32 @@ public:
   }
 };
 
+// A 'pair' to stand in for the recipe.  RecipeDecl is the main declaration, and
+// InitFromTemporary is the 'temp' declaration we put in to be 'copied from'.
+struct OpenACCFirstPrivateRecipe {
+  VarDecl *RecipeDecl, *InitFromTemporary;
+  OpenACCFirstPrivateRecipe(VarDecl *R, VarDecl *T)
+      : RecipeDecl(R), InitFromTemporary(T) {}
+  OpenACCFirstPrivateRecipe(std::pair<VarDecl *, VarDecl *> p)
+      : RecipeDecl(p.first), InitFromTemporary(p.second) {}
+};
+
 class OpenACCFirstPrivateClause final
     : public OpenACCClauseWithVarList,
       private llvm::TrailingObjects<OpenACCFirstPrivateClause, Expr *,
-                                    VarDecl *> {
+                                    OpenACCFirstPrivateRecipe> {
   friend TrailingObjects;
 
   OpenACCFirstPrivateClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                             ArrayRef<Expr *> VarList,
-                            ArrayRef<VarDecl *> InitRecipes,
+                            ArrayRef<OpenACCFirstPrivateRecipe> InitRecipes,
                             SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::FirstPrivate, BeginLoc,
                                  LParenLoc, EndLoc) {
     assert(VarList.size() == InitRecipes.size());
     setExprs(getTrailingObjects<Expr *>(VarList.size()), VarList);
-    llvm::uninitialized_copy(InitRecipes, getTrailingObjects<VarDecl *>());
+    llvm::uninitialized_copy(InitRecipes,
+                             getTrailingObjects<OpenACCFirstPrivateRecipe>());
   }
 
 public:
@@ -900,19 +911,20 @@ public:
 
   // Gets a list of 'made up' `VarDecl` objects that can be used by codegen to
   // ensure that we properly initialize each of these variables.
-  ArrayRef<VarDecl *> getInitRecipes() {
-    return ArrayRef<VarDecl *>{getTrailingObjects<VarDecl *>(),
-                               getExprs().size()};
+  ArrayRef<OpenACCFirstPrivateRecipe> getInitRecipes() {
+    return ArrayRef<OpenACCFirstPrivateRecipe>{
+        getTrailingObjects<OpenACCFirstPrivateRecipe>(), getExprs().size()};
   }
 
-  ArrayRef<VarDecl *> getInitRecipes() const {
-    return ArrayRef<VarDecl *>{getTrailingObjects<VarDecl *>(),
-                               getExprs().size()};
+  ArrayRef<OpenACCFirstPrivateRecipe> getInitRecipes() const {
+    return ArrayRef<OpenACCFirstPrivateRecipe>{
+        getTrailingObjects<OpenACCFirstPrivateRecipe>(), getExprs().size()};
   }
 
   static OpenACCFirstPrivateClause *
   Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
-         ArrayRef<Expr *> VarList, ArrayRef<VarDecl *> InitRecipes,
+         ArrayRef<Expr *> VarList,
+         ArrayRef<OpenACCFirstPrivateRecipe> InitRecipes,
          SourceLocation EndLoc);
 
   size_t numTrailingObjects(OverloadToken<Expr *>) const {
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f903b7f..cf23594 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13531,6 +13531,11 @@ def err_acc_invalid_default_type
 def err_acc_device_type_multiple_archs
     : Error<"OpenACC 'device_type' clause on a 'set' construct only permits "
             "one architecture">;
+def warn_acc_var_referenced_non_const_array
+    : Warning<"variable of array type %0 referenced in OpenACC '%1' clause "
+              "does not have constant bounds; initialization will happen after "
+              "decay to pointer">,
+      InGroup<DiagGroup<"openacc-var-non-const-array">>;
 def warn_acc_var_referenced_lacks_op
     : Warning<"variable of type %0 referenced in OpenACC '%1' clause does not "
               "have a %enum_select<AccVarReferencedReason>{%DefCtor{default "
diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h
index 964749c..d078de5 100644
--- a/clang/include/clang/Sema/SemaOpenACC.h
+++ b/clang/include/clang/Sema/SemaOpenACC.h
@@ -240,7 +240,11 @@ public:
   // Creates a VarDecl with a proper default init for the purposes of a
   // `private`/'firstprivate'/'reduction' clause, so it can be used to generate
   // a recipe later.
-  VarDecl *CreateInitRecipe(OpenACCClauseKind CK, const Expr *VarExpr);
+  //  The first entry is the recipe itself, the second is any required
+  //  'temporary' created for the init (in the case of a copy), such as with
+  //  firstprivate.
+  std::pair<VarDecl *, VarDecl *> CreateInitRecipe(OpenACCClauseKind CK,
+                                                   const Expr *VarExpr);
 
 public:
   ComputeConstructInfo &getActiveComputeConstructInfo() {
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 4ab4dee..cc99efa 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -5997,6 +5997,23 @@ bool Compiler<Emitter>::checkLiteralType(const Expr *E) {
   return this->emitCheckLiteralType(E->getType().getTypePtr(), E);
 }
 
+static bool initNeedsOverridenLoc(const CXXCtorInitializer *Init) {
+  const Expr *InitExpr = Init->getInit();
+
+  if (!Init->isWritten() && !Init->isInClassMemberInitializer() &&
+      !isa<CXXConstructExpr>(InitExpr))
+    return true;
+
+  if (const auto *CE = dyn_cast<CXXConstructExpr>(InitExpr)) {
+    const CXXConstructorDecl *Ctor = CE->getConstructor();
+    if (Ctor->isDefaulted() && Ctor->isCopyOrMoveConstructor() &&
+        Ctor->isTrivial())
+      return true;
+  }
+
+  return false;
+}
+
 template <class Emitter>
 bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) {
   assert(!ReturnType);
@@ -6071,10 +6088,7 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) {
       const Record::Field *F = R->getField(Member);
 
       LocOverrideScope<Emitter> LOS(this, SourceInfo{},
-                                    !Init->isWritten() &&
-                                        !Init->isInClassMemberInitializer() &&
-                                        (!isa<CXXConstructExpr>(InitExpr) ||
-                                         Member->isAnonymousStructOrUnion()));
+                                    initNeedsOverridenLoc(Init));
       if (!emitFieldInitializer(F, F->Offset, InitExpr, IsUnion))
         return false;
     } else if (const Type *Base = Init->getBaseClass()) {
@@ -6104,10 +6118,7 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) {
         return false;
     } else if (const IndirectFieldDecl *IFD = Init->getIndirectMember()) {
       LocOverrideScope<Emitter> LOS(this, SourceInfo{},
-                                    !Init->isWritten() &&
-                                        !Init->isInClassMemberInitializer() &&
-                                        !isa<CXXConstructExpr>(InitExpr));
-
+                                    initNeedsOverridenLoc(Init));
       assert(IFD->getChainingSize() >= 2);
 
       unsigned NestedFieldOffset = 0;
diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp
index 7403e90..629c1ff 100644
--- a/clang/lib/AST/ByteCode/Descriptor.cpp
+++ b/clang/lib/AST/ByteCode/Descriptor.cpp
@@ -153,28 +153,6 @@ static void dtorArrayDesc(Block *B, std::byte *Ptr, const Descriptor *D) {
   }
 }
 
-static void moveArrayDesc(Block *B, std::byte *Src, std::byte *Dst,
-                          const Descriptor *D) {
-  const unsigned NumElems = D->getNumElems();
-  const unsigned ElemSize =
-      D->ElemDesc->getAllocSize() + sizeof(InlineDescriptor);
-
-  unsigned ElemOffset = 0;
-  for (unsigned I = 0; I < NumElems; ++I, ElemOffset += ElemSize) {
-    auto *SrcPtr = Src + ElemOffset;
-    auto *DstPtr = Dst + ElemOffset;
-
-    auto *SrcDesc = reinterpret_cast<InlineDescriptor *>(SrcPtr);
-    auto *SrcElemLoc = reinterpret_cast<std::byte *>(SrcDesc + 1);
-    auto *DstDesc = reinterpret_cast<InlineDescriptor *>(DstPtr);
-    auto *DstElemLoc = reinterpret_cast<std::byte *>(DstDesc + 1);
-
-    *DstDesc = *SrcDesc;
-    if (auto Fn = D->ElemDesc->MoveFn)
-      Fn(B, SrcElemLoc, DstElemLoc, D->ElemDesc);
-  }
-}
-
 static void initField(Block *B, std::byte *Ptr, bool IsConst, bool IsMutable,
                       bool IsVolatile, bool IsActive, bool IsUnionField,
                       bool InUnion, const Descriptor *D, unsigned FieldOffset) {
@@ -268,45 +246,6 @@ static void dtorRecord(Block *B, std::byte *Ptr, const Descriptor *D) {
     destroyBase(B, Ptr, F.Desc, F.Offset);
 }
 
-static void moveRecord(Block *B, std::byte *Src, std::byte *Dst,
-                       const Descriptor *D) {
-  assert(D);
-  assert(D->ElemRecord);
-
-  // FIXME: Code duplication.
-  for (const auto &F : D->ElemRecord->fields()) {
-    auto FieldOffset = F.Offset;
-    const auto *SrcDesc =
-        reinterpret_cast<const InlineDescriptor *>(Src + FieldOffset) - 1;
-    auto *DestDesc =
-        reinterpret_cast<InlineDescriptor *>(Dst + FieldOffset) - 1;
-    std::memcpy(DestDesc, SrcDesc, sizeof(InlineDescriptor));
-
-    if (auto Fn = F.Desc->MoveFn)
-      Fn(B, Src + FieldOffset, Dst + FieldOffset, F.Desc);
-  }
-
-  for (const auto &Base : D->ElemRecord->bases()) {
-    auto BaseOffset = Base.Offset;
-    const auto *SrcDesc =
-        reinterpret_cast<const InlineDescriptor *>(Src + BaseOffset) - 1;
-    auto *DestDesc = reinterpret_cast<InlineDescriptor *>(Dst + BaseOffset) - 1;
-    std::memcpy(DestDesc, SrcDesc, sizeof(InlineDescriptor));
-
-    if (auto Fn = Base.Desc->MoveFn)
-      Fn(B, Src + BaseOffset, Dst + BaseOffset, Base.Desc);
-  }
-
-  for (const auto &VBase : D->ElemRecord->virtual_bases()) {
-    auto VBaseOffset = VBase.Offset;
-    const auto *SrcDesc =
-        reinterpret_cast<const InlineDescriptor *>(Src + VBaseOffset) - 1;
-    auto *DestDesc =
-        reinterpret_cast<InlineDescriptor *>(Dst + VBaseOffset) - 1;
-    std::memcpy(DestDesc, SrcDesc, sizeof(InlineDescriptor));
-  }
-}
-
 static BlockCtorFn getCtorPrim(PrimType Type) {
   // Floating types are special. They are primitives, but need their
   // constructor called.
@@ -337,18 +276,6 @@ static BlockDtorFn getDtorPrim(PrimType Type) {
   COMPOSITE_TYPE_SWITCH(Type, return dtorTy<T>, return nullptr);
 }
 
-static BlockMoveFn getMovePrim(PrimType Type) {
-  if (Type == PT_Float)
-    return moveTy<PrimConv<PT_Float>::T>;
-  if (Type == PT_IntAP)
-    return moveTy<PrimConv<PT_IntAP>::T>;
-  if (Type == PT_IntAPS)
-    return moveTy<PrimConv<PT_IntAPS>::T>;
-  if (Type == PT_MemberPtr)
-    return moveTy<PrimConv<PT_MemberPtr>::T>;
-  COMPOSITE_TYPE_SWITCH(Type, return moveTy<T>, return nullptr);
-}
-
 static BlockCtorFn getCtorArrayPrim(PrimType Type) {
   TYPE_SWITCH(Type, return ctorArrayTy<T>);
   llvm_unreachable("unknown Expr");
@@ -359,11 +286,6 @@ static BlockDtorFn getDtorArrayPrim(PrimType Type) {
   llvm_unreachable("unknown Expr");
 }
 
-static BlockMoveFn getMoveArrayPrim(PrimType Type) {
-  TYPE_SWITCH(Type, return moveArrayTy<T>);
-  llvm_unreachable("unknown Expr");
-}
-
 /// Primitives.
 Descriptor::Descriptor(const DeclTy &D, const Type *SourceTy, PrimType Type,
                        MetadataSize MD, bool IsConst, bool IsTemporary,
@@ -372,7 +294,7 @@ Descriptor::Descriptor(const DeclTy &D, const Type *SourceTy, PrimType Type,
       MDSize(MD.value_or(0)), AllocSize(align(Size + MDSize)), PrimT(Type),
       IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary),
       IsVolatile(IsVolatile), CtorFn(getCtorPrim(Type)),
-      DtorFn(getDtorPrim(Type)), MoveFn(getMovePrim(Type)) {
+      DtorFn(getDtorPrim(Type)) {
   assert(AllocSize >= Size);
   assert(Source && "Missing source");
 }
@@ -386,7 +308,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD,
       AllocSize(align(MDSize) + align(Size) + sizeof(InitMapPtr)), PrimT(Type),
       IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary),
       IsArray(true), CtorFn(getCtorArrayPrim(Type)),
-      DtorFn(getDtorArrayPrim(Type)), MoveFn(getMoveArrayPrim(Type)) {
+      DtorFn(getDtorArrayPrim(Type)) {
   assert(Source && "Missing source");
   assert(NumElems <= (MaxArrayElemBytes / ElemSize));
 }
@@ -399,7 +321,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD,
       AllocSize(MDSize + sizeof(InitMapPtr) + alignof(void *)), PrimT(Type),
       IsConst(IsConst), IsMutable(false), IsTemporary(IsTemporary),
       IsArray(true), CtorFn(getCtorArrayPrim(Type)),
-      DtorFn(getDtorArrayPrim(Type)), MoveFn(getMoveArrayPrim(Type)) {
+      DtorFn(getDtorArrayPrim(Type)) {
   assert(Source && "Missing source");
 }
 
@@ -414,7 +336,7 @@ Descriptor::Descriptor(const DeclTy &D, const Type *SourceTy,
       AllocSize(std::max<size_t>(alignof(void *), Size) + MDSize),
       ElemDesc(Elem), IsConst(IsConst), IsMutable(IsMutable),
       IsTemporary(IsTemporary), IsArray(true), CtorFn(ctorArrayDesc),
-      DtorFn(dtorArrayDesc), MoveFn(moveArrayDesc) {
+      DtorFn(dtorArrayDesc) {
   assert(Source && "Missing source");
 }
 
@@ -425,7 +347,7 @@ Descriptor::Descriptor(const DeclTy &D, const Descriptor *Elem, MetadataSize MD,
       Size(UnknownSizeMark), MDSize(MD.value_or(0)),
       AllocSize(MDSize + alignof(void *)), ElemDesc(Elem), IsConst(true),
       IsMutable(false), IsTemporary(IsTemporary), IsArray(true),
-      CtorFn(ctorArrayDesc), DtorFn(dtorArrayDesc), MoveFn(moveArrayDesc) {
+      CtorFn(ctorArrayDesc), DtorFn(dtorArrayDesc) {
   assert(Source && "Missing source");
 }
 
@@ -437,7 +359,7 @@ Descriptor::Descriptor(const DeclTy &D, const Record *R, MetadataSize MD,
       Size(ElemSize), MDSize(MD.value_or(0)), AllocSize(Size + MDSize),
       ElemRecord(R), IsConst(IsConst), IsMutable(IsMutable),
       IsTemporary(IsTemporary), IsVolatile(IsVolatile), CtorFn(ctorRecord),
-      DtorFn(dtorRecord), MoveFn(moveRecord) {
+      DtorFn(dtorRecord) {
   assert(Source && "Missing source");
 }
 
diff --git a/clang/lib/AST/ByteCode/Descriptor.h b/clang/lib/AST/ByteCode/Descriptor.h
index 4c925f6..cd34e11 100644
--- a/clang/lib/AST/ByteCode/Descriptor.h
+++ b/clang/lib/AST/ByteCode/Descriptor.h
@@ -41,14 +41,6 @@ using BlockCtorFn = void (*)(Block *Storage, std::byte *FieldPtr, bool IsConst,
 using BlockDtorFn = void (*)(Block *Storage, std::byte *FieldPtr,
                              const Descriptor *FieldDesc);
 
-/// Invoked when a block with pointers referencing it goes out of scope. Such
-/// blocks are persisted: the move function copies all inline descriptors and
-/// non-trivial fields, as existing pointers might need to reference those
-/// descriptors. Data is not copied since it cannot be legally read.
-using BlockMoveFn = void (*)(Block *Storage, std::byte *SrcFieldPtr,
-                             std::byte *DstFieldPtr,
-                             const Descriptor *FieldDesc);
-
 enum class GlobalInitState {
   Initialized,
   NoInitializer,
@@ -181,7 +173,6 @@ public:
   /// Storage management methods.
   const BlockCtorFn CtorFn = nullptr;
   const BlockDtorFn DtorFn = nullptr;
-  const BlockMoveFn MoveFn = nullptr;
 
   /// Allocates a descriptor for a primitive.
   Descriptor(const DeclTy &D, const Type *SourceTy, PrimType Type,
diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp
index 14f99c7..9342192 100644
--- a/clang/lib/AST/ByteCode/InterpFrame.cpp
+++ b/clang/lib/AST/ByteCode/InterpFrame.cpp
@@ -133,6 +133,11 @@ static bool shouldSkipInBacktrace(const Function *F) {
       MD && MD->getParent()->isAnonymousStructOrUnion())
     return true;
 
+  if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(FD);
+      Ctor && Ctor->isDefaulted() && Ctor->isTrivial() &&
+      Ctor->isCopyOrMoveConstructor() && Ctor->inits().empty())
+    return true;
+
   return false;
 }
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 34af9cc..3679327 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9860,11 +9860,15 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr *E) {
     if (Value.isInt()) {
       unsigned Size = Info.Ctx.getTypeSize(E->getType());
       uint64_t N = Value.getInt().extOrTrunc(Size).getZExtValue();
-      Result.Base = (Expr*)nullptr;
-      Result.InvalidBase = false;
-      Result.Offset = CharUnits::fromQuantity(N);
-      Result.Designator.setInvalid();
-      Result.IsNullPtr = false;
+      if (N == Info.Ctx.getTargetNullPointerValue(E->getType())) {
+        Result.setNull(Info.Ctx, E->getType());
+      } else {
+        Result.Base = (Expr *)nullptr;
+        Result.InvalidBase = false;
+        Result.Offset = CharUnits::fromQuantity(N);
+        Result.Designator.setInvalid();
+        Result.IsNullPtr = false;
+      }
       return true;
     } else {
       // In rare instances, the value isn't an lvalue.
diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index f7a98bd..fe20004 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -329,10 +329,11 @@ OpenACCPrivateClause::Create(const ASTContext &C, SourceLocation BeginLoc,
 
 OpenACCFirstPrivateClause *OpenACCFirstPrivateClause::Create(
     const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
-    ArrayRef<Expr *> VarList, ArrayRef<VarDecl *> InitRecipes,
+    ArrayRef<Expr *> VarList, ArrayRef<OpenACCFirstPrivateRecipe> InitRecipes,
     SourceLocation EndLoc) {
-  void *Mem =
-      C.Allocate(OpenACCFirstPrivateClause::totalSizeToAlloc<Expr *, VarDecl *>(
+  void *Mem = C.Allocate(
+      OpenACCFirstPrivateClause::totalSizeToAlloc<Expr *,
+                                                  OpenACCFirstPrivateRecipe>(
           VarList.size(), InitRecipes.size()));
   return new (Mem) OpenACCFirstPrivateClause(BeginLoc, LParenLoc, VarList,
                                              InitRecipes, EndLoc);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 4c36f24..0297f9c 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2645,8 +2645,10 @@ void OpenACCClauseProfiler::VisitFirstPrivateClause(
     const OpenACCFirstPrivateClause &Clause) {
   VisitClauseWithVarList(Clause);
 
-  for (auto *VD : Clause.getInitRecipes())
-    Profiler.VisitDecl(VD);
+  for (auto &Recipe : Clause.getInitRecipes()) {
+    Profiler.VisitDecl(Recipe.RecipeDecl);
+    Profiler.VisitDecl(Recipe.InitFromTemporary);
+  }
 }
 
 void OpenACCClauseProfiler::VisitAttachClause(
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 40dff7e..f4ead3d 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -1986,6 +1986,14 @@ public:
     const auto *FD = dyn_cast<FunctionDecl>(CE->getDirectCallee());
     if (!FD)
       return false;
+
+    bool IsGlobalAndNotInAnyNamespace =
+        FD->isGlobal() && !FD->getEnclosingNamespaceContext()->isNamespace();
+
+    // A libc function must either be in the std:: namespace or a global
+    // function that is not in any namespace:
+    if (!FD->isInStdNamespace() && !IsGlobalAndNotInAnyNamespace)
+      return false;
     auto isSingleStringLiteralArg = false;
     if (CE->getNumArgs() == 1) {
       isSingleStringLiteralArg =
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
index 3aa170e..c22cf60 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -116,6 +116,15 @@ public:
 
   mlir::Value emitPromotedComplexOperand(const Expr *e, QualType promotionTy);
 
+  LValue emitCompoundAssignLValue(
+      const CompoundAssignOperator *e,
+      mlir::Value (ComplexExprEmitter::*func)(const BinOpInfo &),
+      RValue &value);
+
+  mlir::Value emitCompoundAssign(
+      const CompoundAssignOperator *e,
+      mlir::Value (ComplexExprEmitter::*func)(const BinOpInfo &));
+
   mlir::Value emitBinAdd(const BinOpInfo &op);
   mlir::Value emitBinSub(const BinOpInfo &op);
   mlir::Value emitBinMul(const BinOpInfo &op);
@@ -153,6 +162,15 @@ public:
   HANDLEBINOP(Sub)
   HANDLEBINOP(Mul)
 #undef HANDLEBINOP
+
+  // Compound assignments.
+  mlir::Value VisitBinAddAssign(const CompoundAssignOperator *e) {
+    return emitCompoundAssign(e, &ComplexExprEmitter::emitBinAdd);
+  }
+
+  mlir::Value VisitBinSubAssign(const CompoundAssignOperator *e) {
+    return emitCompoundAssign(e, &ComplexExprEmitter::emitBinSub);
+  }
 };
 } // namespace
 
@@ -166,6 +184,12 @@ static const ComplexType *getComplexType(QualType type) {
 }
 #endif // NDEBUG
 
+static mlir::Value createComplexFromReal(CIRGenBuilderTy &builder,
+                                         mlir::Location loc, mlir::Value real) {
+  mlir::Value imag = builder.getNullValue(real.getType(), loc);
+  return builder.createComplexCreate(loc, real, imag);
+}
+
 LValue ComplexExprEmitter::emitBinAssignLValue(const BinaryOperator *e,
                                                mlir::Value &value) {
   assert(cgf.getContext().hasSameUnqualifiedType(e->getLHS()->getType(),
@@ -602,7 +626,7 @@ mlir::Value ComplexExprEmitter::emitPromoted(const Expr *e,
 
   mlir::Value result = Visit(const_cast<Expr *>(e));
   if (!promotionTy.isNull())
-    cgf.cgm.errorNYI("emitPromoted emitPromotedValue");
+    return cgf.emitPromotedValue(result, promotionTy);
 
   return result;
 }
@@ -630,6 +654,104 @@ ComplexExprEmitter::emitBinOps(const BinaryOperator *e, QualType promotionTy) {
   return binOpInfo;
 }
 
+LValue ComplexExprEmitter::emitCompoundAssignLValue(
+    const CompoundAssignOperator *e,
+    mlir::Value (ComplexExprEmitter::*func)(const BinOpInfo &), RValue &value) {
+  QualType lhsTy = e->getLHS()->getType();
+  QualType rhsTy = e->getRHS()->getType();
+  SourceLocation exprLoc = e->getExprLoc();
+  mlir::Location loc = cgf.getLoc(exprLoc);
+
+  if (lhsTy->getAs<AtomicType>()) {
+    cgf.cgm.errorNYI("emitCompoundAssignLValue AtmoicType");
+    return {};
+  }
+
+  BinOpInfo opInfo{loc};
+  opInfo.fpFeatures = e->getFPFeaturesInEffect(cgf.getLangOpts());
+
+  assert(!cir::MissingFeatures::cgFPOptionsRAII());
+
+  // Load the RHS and LHS operands.
+  // __block variables need to have the rhs evaluated first, plus this should
+  // improve codegen a little.
+  QualType promotionTypeCR = getPromotionType(e->getComputationResultType());
+  opInfo.ty = promotionTypeCR.isNull() ? e->getComputationResultType()
+                                       : promotionTypeCR;
+
+  QualType complexElementTy =
+      opInfo.ty->castAs<ComplexType>()->getElementType();
+  QualType promotionTypeRHS = getPromotionType(rhsTy);
+
+  // The RHS should have been converted to the computation type.
+  if (e->getRHS()->getType()->isRealFloatingType()) {
+    if (!promotionTypeRHS.isNull()) {
+      opInfo.rhs = createComplexFromReal(
+          cgf.getBuilder(), loc,
+          cgf.emitPromotedScalarExpr(e->getRHS(), promotionTypeRHS));
+    } else {
+      assert(cgf.getContext().hasSameUnqualifiedType(complexElementTy, rhsTy));
+      opInfo.rhs = createComplexFromReal(cgf.getBuilder(), loc,
+                                         cgf.emitScalarExpr(e->getRHS()));
+    }
+  } else {
+    if (!promotionTypeRHS.isNull()) {
+      opInfo.rhs = cgf.emitPromotedComplexExpr(e->getRHS(), promotionTypeRHS);
+    } else {
+      assert(cgf.getContext().hasSameUnqualifiedType(opInfo.ty, rhsTy));
+      opInfo.rhs = Visit(e->getRHS());
+    }
+  }
+
+  LValue lhs = cgf.emitLValue(e->getLHS());
+
+  // Load from the l-value and convert it.
+  QualType promotionTypeLHS = getPromotionType(e->getComputationLHSType());
+  if (lhsTy->isAnyComplexType()) {
+    mlir::Value lhsValue = emitLoadOfLValue(lhs, exprLoc);
+    QualType destTy = promotionTypeLHS.isNull() ? opInfo.ty : promotionTypeLHS;
+    opInfo.lhs = emitComplexToComplexCast(lhsValue, lhsTy, destTy, exprLoc);
+  } else {
+    cgf.cgm.errorNYI("emitCompoundAssignLValue emitLoadOfScalar");
+    return {};
+  }
+
+  // Expand the binary operator.
+  mlir::Value result = (this->*func)(opInfo);
+
+  // Truncate the result and store it into the LHS lvalue.
+  if (lhsTy->isAnyComplexType()) {
+    mlir::Value resultValue =
+        emitComplexToComplexCast(result, opInfo.ty, lhsTy, exprLoc);
+    emitStoreOfComplex(loc, resultValue, lhs, /*isInit*/ false);
+    value = RValue::getComplex(resultValue);
+  } else {
+    mlir::Value resultValue =
+        cgf.emitComplexToScalarConversion(result, opInfo.ty, lhsTy, exprLoc);
+    cgf.emitStoreOfScalar(resultValue, lhs, /*isInit*/ false);
+    value = RValue::get(resultValue);
+  }
+
+  return lhs;
+}
+
+mlir::Value ComplexExprEmitter::emitCompoundAssign(
+    const CompoundAssignOperator *e,
+    mlir::Value (ComplexExprEmitter::*func)(const BinOpInfo &)) {
+  RValue val;
+  LValue lv = emitCompoundAssignLValue(e, func, val);
+
+  // The result of an assignment in C is the assigned r-value.
+  if (!cgf.getLangOpts().CPlusPlus)
+    return val.getComplexValue();
+
+  // If the lvalue is non-volatile, return the computed value of the assignment.
+  if (!lv.isVolatileQualified())
+    return val.getComplexValue();
+
+  return emitLoadOfLValue(lv, e->getExprLoc());
+}
+
 mlir::Value ComplexExprEmitter::emitBinAdd(const BinOpInfo &op) {
   assert(!cir::MissingFeatures::fastMathFlags());
   assert(!cir::MissingFeatures::cgFPOptionsRAII());
@@ -685,6 +807,31 @@ mlir::Value CIRGenFunction::emitComplexExpr(const Expr *e) {
   return ComplexExprEmitter(*this).Visit(const_cast<Expr *>(e));
 }
 
+using CompoundFunc =
+    mlir::Value (ComplexExprEmitter::*)(const ComplexExprEmitter::BinOpInfo &);
+
+static CompoundFunc getComplexOp(BinaryOperatorKind op) {
+  switch (op) {
+  case BO_MulAssign:
+    llvm_unreachable("getComplexOp: BO_MulAssign");
+  case BO_DivAssign:
+    llvm_unreachable("getComplexOp: BO_DivAssign");
+  case BO_SubAssign:
+    return &ComplexExprEmitter::emitBinSub;
+  case BO_AddAssign:
+    return &ComplexExprEmitter::emitBinAdd;
+  default:
+    llvm_unreachable("unexpected complex compound assignment");
+  }
+}
+
+LValue CIRGenFunction::emitComplexCompoundAssignmentLValue(
+    const CompoundAssignOperator *e) {
+  CompoundFunc op = getComplexOp(e->getOpcode());
+  RValue val;
+  return ComplexExprEmitter(*this).emitCompoundAssignLValue(e, op, val);
+}
+
 mlir::Value CIRGenFunction::emitComplexPrePostIncDec(const UnaryOperator *e,
                                                      LValue lv,
                                                      cir::UnaryOpKind op,
@@ -729,3 +876,11 @@ mlir::Value CIRGenFunction::emitPromotedComplexExpr(const Expr *e,
                                                     QualType promotionType) {
   return ComplexExprEmitter(*this).emitPromoted(e, promotionType);
 }
+
+mlir::Value CIRGenFunction::emitPromotedValue(mlir::Value result,
+                                              QualType promotionType) {
+  assert(!mlir::cast<cir::ComplexType>(result.getType()).isIntegerComplex() &&
+         "integral complex will never be promoted");
+  return builder.createCast(cir::CastKind::float_complex, result,
+                            convertType(promotionType));
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 32c1c1a..3e06513 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -1955,6 +1955,29 @@ mlir::Value CIRGenFunction::emitScalarConversion(mlir::Value src,
       .emitScalarConversion(src, srcTy, dstTy, loc);
 }
 
+mlir::Value CIRGenFunction::emitComplexToScalarConversion(mlir::Value src,
+                                                          QualType srcTy,
+                                                          QualType dstTy,
+                                                          SourceLocation loc) {
+  assert(srcTy->isAnyComplexType() && hasScalarEvaluationKind(dstTy) &&
+         "Invalid complex -> scalar conversion");
+
+  QualType complexElemTy = srcTy->castAs<ComplexType>()->getElementType();
+  if (dstTy->isBooleanType()) {
+    auto kind = complexElemTy->isFloatingType()
+                    ? cir::CastKind::float_complex_to_bool
+                    : cir::CastKind::int_complex_to_bool;
+    return builder.createCast(getLoc(loc), kind, src, convertType(dstTy));
+  }
+
+  auto kind = complexElemTy->isFloatingType()
+                  ? cir::CastKind::float_complex_to_real
+                  : cir::CastKind::int_complex_to_real;
+  mlir::Value real =
+      builder.createCast(getLoc(loc), kind, src, convertType(complexElemTy));
+  return emitScalarConversion(real, complexElemTy, dstTy, loc);
+}
+
 mlir::Value ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *e) {
   // Perform vector logical not on comparison with zero vector.
   if (e->getType()->isVectorType() &&
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index eb05c93..e93dc0b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -785,9 +785,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
     }
     if (!ty->isAnyComplexType())
       return emitCompoundAssignmentLValue(cast<CompoundAssignOperator>(e));
-    cgm.errorNYI(e->getSourceRange(),
-                 "CompoundAssignOperator with ComplexType");
-    return LValue();
+
+    return emitComplexCompoundAssignmentLValue(cast<CompoundAssignOperator>(e));
   }
   case Expr::CallExprClass:
   case Expr::CXXMemberCallExprClass:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 3d92545..2e60cfc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -944,6 +944,11 @@ public:
   /// sanitizer is enabled, a runtime check is also emitted.
   mlir::Value emitCheckedArgForAssume(const Expr *e);
 
+  /// Emit a conversion from the specified complex type to the specified
+  /// destination type, where the destination type is an LLVM scalar type.
+  mlir::Value emitComplexToScalarConversion(mlir::Value src, QualType srcTy,
+                                            QualType dstTy, SourceLocation loc);
+
   LValue emitCompoundAssignmentLValue(const clang::CompoundAssignOperator *e);
   LValue emitCompoundLiteralLValue(const CompoundLiteralExpr *e);
 
@@ -1047,6 +1052,8 @@ public:
 
   mlir::Value emitPromotedScalarExpr(const Expr *e, QualType promotionType);
 
+  mlir::Value emitPromotedValue(mlir::Value result, QualType promotionType);
+
   /// Emit the computation of the specified expression of scalar type.
   mlir::Value emitScalarExpr(const clang::Expr *e);
 
@@ -1076,6 +1083,7 @@ public:
                                        cir::UnaryOpKind op, bool isPre);
 
   LValue emitComplexAssignmentLValue(const BinaryOperator *e);
+  LValue emitComplexCompoundAssignmentLValue(const CompoundAssignOperator *e);
 
   void emitCompoundStmt(const clang::CompoundStmt &s);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index b143682..425250d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1307,7 +1307,8 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
   }
 
   case Decl::Var:
-  case Decl::Decomposition: {
+  case Decl::Decomposition:
+  case Decl::VarTemplateSpecialization: {
     auto *vd = cast<VarDecl>(decl);
     if (isa<DecompositionDecl>(decl)) {
       errorNYI(decl->getSourceRange(), "global variable decompositions");
@@ -1342,6 +1343,8 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
   case Decl::StaticAssert:
   case Decl::TypeAliasTemplate:
   case Decl::UsingShadow:
+  case Decl::VarTemplate:
+  case Decl::VarTemplatePartialSpecialization:
     break;
 
   case Decl::CXXConstructor:
diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h
index 96fe046..2dd320d 100644
--- a/clang/lib/CodeGen/CGCXXABI.h
+++ b/clang/lib/CodeGen/CGCXXABI.h
@@ -294,14 +294,22 @@ public:
                                              Address Value,
                                              QualType SrcRecordTy) = 0;
 
+  struct ExactDynamicCastInfo {
+    bool RequiresCastToPrimaryBase;
+    CharUnits Offset;
+  };
+
+  virtual std::optional<ExactDynamicCastInfo>
+  getExactDynamicCastInfo(QualType SrcRecordTy, QualType DestTy,
+                          QualType DestRecordTy) = 0;
+
   /// Emit a dynamic_cast from SrcRecordTy to DestRecordTy. The cast fails if
   /// the dynamic type of Value is not exactly DestRecordTy.
-  virtual llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address Value,
-                                            QualType SrcRecordTy,
-                                            QualType DestTy,
-                                            QualType DestRecordTy,
-                                            llvm::BasicBlock *CastSuccess,
-                                            llvm::BasicBlock *CastFail) = 0;
+  virtual llvm::Value *emitExactDynamicCast(
+      CodeGenFunction &CGF, Address Value, QualType SrcRecordTy,
+      QualType DestTy, QualType DestRecordTy,
+      const ExactDynamicCastInfo &CastInfo, llvm::BasicBlock *CastSuccess,
+      llvm::BasicBlock *CastFail) = 0;
 
   virtual bool EmitBadCastCall(CodeGenFunction &CGF) = 0;
 
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 5a3d4e4..ed35a05 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3789,33 +3789,50 @@ void CodeGenFunction::EmitCheck(
   Branch->setMetadata(llvm::LLVMContext::MD_prof, Node);
   EmitBlock(Handlers);
 
+  // Clear arguments for the MinimalRuntime handler.
+  if (CGM.getCodeGenOpts().SanitizeMinimalRuntime) {
+    switch (CheckHandler) {
+    case SanitizerHandler::TypeMismatch:
+      // Pass value pointer only. It adds minimal overhead.
+      StaticArgs = {};
+      assert(DynamicArgs.size() == 1);
+      break;
+    default:
+      // No arguments for other checks.
+      StaticArgs = {};
+      DynamicArgs = {};
+      break;
+    }
+  }
+
   // Handler functions take an i8* pointing to the (handler-specific) static
   // information block, followed by a sequence of intptr_t arguments
   // representing operand values.
   SmallVector<llvm::Value *, 4> Args;
   SmallVector<llvm::Type *, 4> ArgTypes;
-  if (!CGM.getCodeGenOpts().SanitizeMinimalRuntime) {
-    Args.reserve(DynamicArgs.size() + 1);
-    ArgTypes.reserve(DynamicArgs.size() + 1);
-
-    // Emit handler arguments and create handler function type.
-    if (!StaticArgs.empty()) {
-      llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
-      auto *InfoPtr = new llvm::GlobalVariable(
-          CGM.getModule(), Info->getType(), false,
-          llvm::GlobalVariable::PrivateLinkage, Info, "", nullptr,
-          llvm::GlobalVariable::NotThreadLocal,
-          CGM.getDataLayout().getDefaultGlobalsAddressSpace());
-      InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-      CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
-      Args.push_back(InfoPtr);
-      ArgTypes.push_back(Args.back()->getType());
-    }
 
-    for (llvm::Value *DynamicArg : DynamicArgs) {
-      Args.push_back(EmitCheckValue(DynamicArg));
-      ArgTypes.push_back(IntPtrTy);
-    }
+  Args.reserve(DynamicArgs.size() + 1);
+  ArgTypes.reserve(DynamicArgs.size() + 1);
+
+  // Emit handler arguments and create handler function type.
+  if (!StaticArgs.empty()) {
+    llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
+    auto *InfoPtr = new llvm::GlobalVariable(
+        CGM.getModule(), Info->getType(),
+        // Non-constant global is used in a handler to deduplicate reports.
+        // TODO: change deduplication logic and make it constant.
+        /*isConstant=*/false, llvm::GlobalVariable::PrivateLinkage, Info, "",
+        nullptr, llvm::GlobalVariable::NotThreadLocal,
+        CGM.getDataLayout().getDefaultGlobalsAddressSpace());
+    InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+    CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
+    Args.push_back(InfoPtr);
+    ArgTypes.push_back(Args.back()->getType());
+  }
+
+  for (llvm::Value *DynamicArg : DynamicArgs) {
+    Args.push_back(EmitCheckValue(DynamicArg));
+    ArgTypes.push_back(IntPtrTy);
   }
 
   llvm::FunctionType *FnType =
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index fef1baf..49d5d8a 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -2295,6 +2295,18 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
                  CGM.getCXXABI().shouldEmitExactDynamicCast(DestRecordTy) &&
                  !getLangOpts().PointerAuthCalls;
 
+  std::optional<CGCXXABI::ExactDynamicCastInfo> ExactCastInfo;
+  if (IsExact) {
+    ExactCastInfo = CGM.getCXXABI().getExactDynamicCastInfo(SrcRecordTy, DestTy,
+                                                            DestRecordTy);
+    if (!ExactCastInfo) {
+      llvm::Value *NullValue = EmitDynamicCastToNull(*this, DestTy);
+      if (!Builder.GetInsertBlock())
+        EmitBlock(createBasicBlock("dynamic_cast.unreachable"));
+      return NullValue;
+    }
+  }
+
   // C++ [expr.dynamic.cast]p4:
   //   If the value of v is a null pointer value in the pointer case, the result
   //   is the null pointer value of type T.
@@ -2322,7 +2334,8 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
     // If the destination type is effectively final, this pointer points to the
     // right type if and only if its vptr has the right value.
     Value = CGM.getCXXABI().emitExactDynamicCast(
-        *this, ThisAddr, SrcRecordTy, DestTy, DestRecordTy, CastEnd, CastNull);
+        *this, ThisAddr, SrcRecordTy, DestTy, DestRecordTy, *ExactCastInfo,
+        CastEnd, CastNull);
   } else {
     assert(DestRecordTy->isRecordType() &&
            "destination type must be a record type!");
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index aae1481..5ffc1ed 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -226,6 +226,10 @@ public:
     return hasUniqueVTablePointer(DestRecordTy);
   }
 
+  std::optional<ExactDynamicCastInfo>
+  getExactDynamicCastInfo(QualType SrcRecordTy, QualType DestTy,
+                          QualType DestRecordTy) override;
+
   llvm::Value *emitDynamicCastCall(CodeGenFunction &CGF, Address Value,
                                    QualType SrcRecordTy, QualType DestTy,
                                    QualType DestRecordTy,
@@ -234,6 +238,7 @@ public:
   llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address ThisAddr,
                                     QualType SrcRecordTy, QualType DestTy,
                                     QualType DestRecordTy,
+                                    const ExactDynamicCastInfo &CastInfo,
                                     llvm::BasicBlock *CastSuccess,
                                     llvm::BasicBlock *CastFail) override;
 
@@ -1681,10 +1686,11 @@ llvm::Value *ItaniumCXXABI::emitDynamicCastCall(
   return Value;
 }
 
-llvm::Value *ItaniumCXXABI::emitExactDynamicCast(
-    CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy,
-    QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastSuccess,
-    llvm::BasicBlock *CastFail) {
+std::optional<CGCXXABI::ExactDynamicCastInfo>
+ItaniumCXXABI::getExactDynamicCastInfo(QualType SrcRecordTy, QualType DestTy,
+                                       QualType DestRecordTy) {
+  assert(shouldEmitExactDynamicCast(DestRecordTy));
+
   ASTContext &Context = getContext();
 
   // Find all the inheritance paths.
@@ -1722,41 +1728,56 @@ llvm::Value *ItaniumCXXABI::emitExactDynamicCast(
     if (!Offset)
       Offset = PathOffset;
     else if (Offset != PathOffset) {
-      // Base appears in at least two different places. Find the most-derived
-      // object and see if it's a DestDecl. Note that the most-derived object
-      // must be at least as aligned as this base class subobject, and must
-      // have a vptr at offset 0.
-      ThisAddr = Address(emitDynamicCastToVoid(CGF, ThisAddr, SrcRecordTy),
-                         CGF.VoidPtrTy, ThisAddr.getAlignment());
-      SrcDecl = DestDecl;
-      Offset = CharUnits::Zero();
-      break;
+      // Base appears in at least two different places.
+      return ExactDynamicCastInfo{/*RequiresCastToPrimaryBase=*/true,
+                                  CharUnits::Zero()};
     }
   }
+  if (!Offset)
+    return std::nullopt;
+  return ExactDynamicCastInfo{/*RequiresCastToPrimaryBase=*/false, *Offset};
+}
 
-  if (!Offset) {
-    // If there are no public inheritance paths, the cast always fails.
-    CGF.EmitBranch(CastFail);
-    return llvm::PoisonValue::get(CGF.VoidPtrTy);
-  }
+llvm::Value *ItaniumCXXABI::emitExactDynamicCast(
+    CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy,
+    QualType DestTy, QualType DestRecordTy,
+    const ExactDynamicCastInfo &ExactCastInfo, llvm::BasicBlock *CastSuccess,
+    llvm::BasicBlock *CastFail) {
+  const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
+  const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl();
+
+  llvm::Value *VTable = nullptr;
+  if (ExactCastInfo.RequiresCastToPrimaryBase) {
+    // Base appears in at least two different places. Find the most-derived
+    // object and see if it's a DestDecl. Note that the most-derived object
+    // must be at least as aligned as this base class subobject, and must
+    // have a vptr at offset 0.
+    llvm::Value *PrimaryBase =
+        emitDynamicCastToVoid(CGF, ThisAddr, SrcRecordTy);
+    ThisAddr = Address(PrimaryBase, CGF.VoidPtrTy, ThisAddr.getAlignment());
+    SrcDecl = DestDecl;
+    Address VTablePtrPtr = ThisAddr.withElementType(CGF.VoidPtrPtrTy);
+    VTable = CGF.Builder.CreateLoad(VTablePtrPtr, "vtable");
+  } else
+    VTable = CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, SrcDecl);
 
   // Compare the vptr against the expected vptr for the destination type at
-  // this offset. Note that we do not know what type ThisAddr points to in
-  // the case where the derived class multiply inherits from the base class
-  // so we can't use GetVTablePtr, so we load the vptr directly instead.
-  llvm::Instruction *VPtr = CGF.Builder.CreateLoad(
-      ThisAddr.withElementType(CGF.VoidPtrPtrTy), "vtable");
-  CGM.DecorateInstructionWithTBAA(
-      VPtr, CGM.getTBAAVTablePtrAccessInfo(CGF.VoidPtrPtrTy));
-  llvm::Value *Success = CGF.Builder.CreateICmpEQ(
-      VPtr, getVTableAddressPoint(BaseSubobject(SrcDecl, *Offset), DestDecl));
-  llvm::Value *Result = ThisAddr.emitRawPointer(CGF);
-  if (!Offset->isZero())
-    Result = CGF.Builder.CreateInBoundsGEP(
-        CGF.CharTy, Result,
-        {llvm::ConstantInt::get(CGF.PtrDiffTy, -Offset->getQuantity())});
+  // this offset.
+  llvm::Constant *ExpectedVTable = getVTableAddressPoint(
+      BaseSubobject(SrcDecl, ExactCastInfo.Offset), DestDecl);
+  llvm::Value *Success = CGF.Builder.CreateICmpEQ(VTable, ExpectedVTable);
+  llvm::Value *AdjustedThisPtr = ThisAddr.emitRawPointer(CGF);
+
+  if (!ExactCastInfo.Offset.isZero()) {
+    CharUnits::QuantityType Offset = ExactCastInfo.Offset.getQuantity();
+    llvm::Constant *OffsetConstant =
+        llvm::ConstantInt::get(CGF.PtrDiffTy, -Offset);
+    AdjustedThisPtr = CGF.Builder.CreateInBoundsGEP(CGF.CharTy, AdjustedThisPtr,
+                                                    OffsetConstant);
+  }
+
   CGF.Builder.CreateCondBr(Success, CastSuccess, CastFail);
-  return Result;
+  return AdjustedThisPtr;
 }
 
 llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF,
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 700ffa4..e8d2451 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -158,9 +158,15 @@ public:
     // TODO: Add support for exact dynamic_casts.
     return false;
   }
+  std::optional<ExactDynamicCastInfo>
+  getExactDynamicCastInfo(QualType SrcRecordTy, QualType DestTy,
+                          QualType DestRecordTy) override {
+    llvm_unreachable("unsupported");
+  }
   llvm::Value *emitExactDynamicCast(CodeGenFunction &CGF, Address Value,
                                     QualType SrcRecordTy, QualType DestTy,
                                     QualType DestRecordTy,
+                                    const ExactDynamicCastInfo &CastInfo,
                                     llvm::BasicBlock *CastSuccess,
                                     llvm::BasicBlock *CastFail) override {
     llvm_unreachable("unsupported");
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 586f287..8c0bba9 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -1012,6 +1012,7 @@ inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) {
     Arg *A = new Arg(Opt, C.getArgs().getArgString(Index), Index,
                      C.getArgs().MakeArgString(Triple.split("-").first),
                      C.getArgs().MakeArgString("--offload-arch=" + Arch));
+    A->claim();
     C.getArgs().append(A);
     C.getArgs().AddSynthesizedArg(A);
     Triples.insert(Triple);
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 25c6b5a..7667dbd 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -855,17 +855,30 @@ void ToolChain::addFortranRuntimeLibs(const ArgList &Args,
 
 void ToolChain::addFortranRuntimeLibraryPath(const llvm::opt::ArgList &Args,
                                              ArgStringList &CmdArgs) const {
-  // Default to the <driver-path>/../lib directory. This works fine on the
-  // platforms that we have tested so far. We will probably have to re-fine
-  // this in the future. In particular, on some platforms, we may need to use
-  // lib64 instead of lib.
+  auto AddLibSearchPathIfExists = [&](const Twine &Path) {
+    // Linker may emit warnings about non-existing directories
+    if (!llvm::sys::fs::is_directory(Path))
+      return;
+
+    if (getTriple().isKnownWindowsMSVCEnvironment())
+      CmdArgs.push_back(Args.MakeArgString("-libpath:" + Path));
+    else
+      CmdArgs.push_back(Args.MakeArgString("-L" + Path));
+  };
+
+  // Search for flang_rt.* at the same location as clang_rt.* with
+  // LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=0. On most platforms, flang_rt is
+  // located at the path returned by getRuntimePath() which is already added to
+  // the library search path. This exception is for Apple-Darwin.
+  AddLibSearchPathIfExists(getCompilerRTPath());
+
+  // Fall back to the non-resource directory <driver-path>/../lib. We will
+  // probably have to refine this in the future. In particular, on some
+  // platforms, we may need to use lib64 instead of lib.
   SmallString<256> DefaultLibPath =
       llvm::sys::path::parent_path(getDriver().Dir);
   llvm::sys::path::append(DefaultLibPath, "lib");
-  if (getTriple().isKnownWindowsMSVCEnvironment())
-    CmdArgs.push_back(Args.MakeArgString("-libpath:" + DefaultLibPath));
-  else
-    CmdArgs.push_back(Args.MakeArgString("-L" + DefaultLibPath));
+  AddLibSearchPathIfExists(DefaultLibPath);
 }
 
 void ToolChain::addFlangRTLibPath(const ArgList &Args,
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index d64290f..9f99edad 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -1485,6 +1485,14 @@ static bool compileModuleAndReadASTImpl(CompilerInstance &ImportingInstance,
     return false;
   }
 
+  // The module is built successfully, we can update its timestamp now.
+  if (ImportingInstance.getPreprocessor()
+          .getHeaderSearchInfo()
+          .getHeaderSearchOpts()
+          .ModulesValidateOncePerBuildSession) {
+    ImportingInstance.getModuleCache().updateModuleTimestamp(ModuleFileName);
+  }
+
   return readASTAfterCompileModule(ImportingInstance, ImportLoc, ModuleNameLoc,
                                    Module, ModuleFileName,
                                    /*OutOfDate=*/nullptr, /*Missing=*/nullptr);
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index dc9fc07..3c3a3d1 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -31,6 +31,14 @@
                  __min_vector_width__(128)))
 #endif
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
 /* SSE4 Multiple Packed Sums of Absolute Difference.  */
 /// Computes sixteen sum of absolute difference (SAD) operations on sets of
 ///    four unsigned 8-bit integers from the 256-bit integer vectors \a X and
@@ -460,7 +468,7 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_and_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a & (__v4du)__b);
@@ -478,7 +486,7 @@ _mm256_and_si256(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_andnot_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)(~(__v4du)__a & (__v4du)__b);
@@ -1822,7 +1830,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_or_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a | (__v4du)__b);
@@ -2974,7 +2982,7 @@ _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_xor_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a ^ (__v4du)__b);
@@ -5289,5 +5297,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
 
 #undef __DEFAULT_FN_ATTRS256
 #undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
 
 #endif /* __AVX2INTRIN_H */
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 88b48e3..62325b9 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -20,6 +20,14 @@
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512dq,no-evex512")))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
 static __inline __mmask8 __DEFAULT_FN_ATTRS
 _knot_mask8(__mmask8 __M)
 {
@@ -167,7 +175,7 @@ _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_pd(__m512d __A, __m512d __B) {
   return (__m512d)((__v8du)__A ^ (__v8du)__B);
 }
@@ -186,7 +194,7 @@ _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_ps (__m512 __A, __m512 __B) {
   return (__m512)((__v16su)__A ^ (__v16su)__B);
 }
@@ -205,7 +213,7 @@ _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_pd(__m512d __A, __m512d __B) {
   return (__m512d)((__v8du)__A | (__v8du)__B);
 }
@@ -224,7 +232,7 @@ _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_ps(__m512 __A, __m512 __B) {
   return (__m512)((__v16su)__A | (__v16su)__B);
 }
@@ -243,7 +251,7 @@ _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_pd(__m512d __A, __m512d __B) {
   return (__m512d)((__v8du)__A & (__v8du)__B);
 }
@@ -262,7 +270,7 @@ _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_ps(__m512 __A, __m512 __B) {
   return (__m512)((__v16su)__A & (__v16su)__B);
 }
@@ -281,7 +289,7 @@ _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_pd(__m512d __A, __m512d __B) {
   return (__m512d)(~(__v8du)__A & (__v8du)__B);
 }
@@ -300,7 +308,7 @@ _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_ps(__m512 __A, __m512 __B) {
   return (__m512)(~(__v16su)__A & (__v16su)__B);
 }
@@ -1375,5 +1383,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
 
 #undef __DEFAULT_FN_ATTRS512
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 45e7eeb..440552c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -645,7 +645,7 @@ _mm512_zextsi256_si512(__m256i __a)
 }
 
 /* Bitwise operators */
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a & (__v16su)__b);
@@ -666,7 +666,7 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
                                          __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a & (__v8du)__b);
@@ -687,13 +687,13 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
                                          __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_si512 (__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v8du)__A & (__v8du)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v16su)__A & (__v16su)__B);
@@ -714,7 +714,7 @@ _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
                                            __U, __A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v8du)__A & (__v8du)__B);
@@ -735,7 +735,7 @@ _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
                                            __U, __A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a | (__v16su)__b);
@@ -755,7 +755,7 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a | (__v8du)__b);
@@ -775,7 +775,7 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a ^ (__v16su)__b);
@@ -795,7 +795,7 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a ^ (__v8du)__b);
@@ -815,19 +815,19 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a & (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a | (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a ^ (__v8du)__b);
@@ -5303,7 +5303,7 @@ _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
           (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_movedup_pd (__m512d __A)
 {
   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
@@ -8665,7 +8665,7 @@ _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
                                        _mm512_setzero_si512());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_movehdup_ps (__m512 __A)
 {
   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
@@ -8688,7 +8688,7 @@ _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_moveldup_ps (__m512 __A)
 {
   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 8e497a9..1da50f0 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -555,7 +555,7 @@ _mm256_rcp_ps(__m256 __a)
 ///    A 256-bit vector of [4 x double] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
 ///    values between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_and_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4du)__a & (__v4du)__b);
@@ -573,7 +573,7 @@ _mm256_and_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
 ///    values between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_and_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8su)__a & (__v8su)__b);
@@ -594,7 +594,7 @@ _mm256_and_ps(__m256 __a, __m256 __b)
 /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
 ///    values of the second operand and the one's complement of the first
 ///    operand.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_andnot_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)(~(__v4du)__a & (__v4du)__b);
@@ -615,7 +615,7 @@ _mm256_andnot_pd(__m256d __a, __m256d __b)
 /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
 ///    values of the second operand and the one's complement of the first
 ///    operand.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_andnot_ps(__m256 __a, __m256 __b)
 {
   return (__m256)(~(__v8su)__a & (__v8su)__b);
@@ -633,7 +633,7 @@ _mm256_andnot_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the
 ///    values between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_or_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4du)__a | (__v4du)__b);
@@ -651,7 +651,7 @@ _mm256_or_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the
 ///    values between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_or_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8su)__a | (__v8su)__b);
@@ -669,7 +669,7 @@ _mm256_or_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the
 ///    values between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_xor_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4du)__a ^ (__v4du)__b);
@@ -687,7 +687,7 @@ _mm256_xor_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the
 ///    values between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_xor_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8su)__a ^ (__v8su)__b);
@@ -2392,7 +2392,7 @@ _mm256_cvtss_f32(__m256 __a)
 ///    return value.
 /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
 ///    values.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_movehdup_ps(__m256 __a)
 {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
@@ -2417,7 +2417,7 @@ _mm256_movehdup_ps(__m256 __a)
 ///    return value.
 /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
 ///    values.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_moveldup_ps(__m256 __a)
 {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
@@ -2439,7 +2439,7 @@ _mm256_moveldup_ps(__m256 __a)
 ///    the return value.
 /// \returns A 256-bit vector of [4 x double] containing the moved and
 ///    duplicated values.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_movedup_pd(__m256d __a)
 {
   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
@@ -3777,7 +3777,7 @@ _mm256_set_ps(float __a, float __b, float __c, float __d,
 /// \param __i7
 ///    A 32-bit integral value used to initialize bits [31:0] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
                  int __i4, int __i5, int __i6, int __i7)
 {
@@ -3825,7 +3825,7 @@ _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
 /// \param __w00
 ///    A 16-bit integral value used to initialize bits [15:0] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
                  short __w11, short __w10, short __w09, short __w08,
                  short __w07, short __w06, short __w05, short __w04,
@@ -3908,7 +3908,7 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
 /// \param __b00
 ///    An 8-bit integral value used to initialize bits [7:0] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
                 char __b27, char __b26, char __b25, char __b24,
                 char __b23, char __b22, char __b21, char __b20,
@@ -3943,7 +3943,7 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
 /// \param __d
 ///    A 64-bit integral value used to initialize bits [63:0] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
 {
   return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };
@@ -4044,7 +4044,7 @@ _mm256_setr_ps(float __a, float __b, float __c, float __d,
 /// \param __i7
 ///    A 32-bit integral value used to initialize bits [255:224] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
                   int __i4, int __i5, int __i6, int __i7)
 {
@@ -4092,7 +4092,7 @@ _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
 /// \param __w00
 ///    A 16-bit integral value used to initialize bits [255:240] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
        short __w11, short __w10, short __w09, short __w08,
        short __w07, short __w06, short __w05, short __w04,
@@ -4177,7 +4177,7 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
 /// \param __b00
 ///    An 8-bit integral value used to initialize bits [255:248] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
                  char __b27, char __b26, char __b25, char __b24,
                  char __b23, char __b22, char __b21, char __b20,
@@ -4210,7 +4210,7 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
 /// \param __d
 ///    A 64-bit integral value used to initialize bits [255:192] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
 {
   return _mm256_set_epi64x(__d, __c, __b, __a);
@@ -4267,7 +4267,7 @@ _mm256_set1_ps(float __w)
 ///    A 32-bit integral value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 256-bit integer vector of [8 x i32].
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set1_epi32(int __i)
 {
   return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i);
@@ -4285,7 +4285,7 @@ _mm256_set1_epi32(int __i)
 ///    A 16-bit integral value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 256-bit integer vector of [16 x i16].
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set1_epi16(short __w)
 {
   return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w,
@@ -4303,7 +4303,7 @@ _mm256_set1_epi16(short __w)
 ///    An 8-bit integral value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 256-bit integer vector of [32 x i8].
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set1_epi8(char __b)
 {
   return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b,
@@ -4324,7 +4324,7 @@ _mm256_set1_epi8(char __b)
 ///    A 64-bit integral value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 256-bit integer vector of [4 x i64].
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set1_epi64x(long long __q)
 {
   return _mm256_set_epi64x(__q, __q, __q, __q);
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 78e8a42..770bb5c 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2127,8 +2127,9 @@ _mm_add_epi32(__m128i __a, __m128i __b) {
 /// \param __b
 ///    A 64-bit integer.
 /// \returns A 64-bit integer containing the sum of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
-  return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_si64(__m64 __a,
+                                                                  __m64 __b) {
+  return (__m64)(((__v1du)__a)[0] + ((__v1du)__b)[0]);
 }
 
 /// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
@@ -2557,8 +2558,9 @@ _mm_sub_epi32(__m128i __a, __m128i __b) {
 ///    A 64-bit integer vector containing the subtrahend.
 /// \returns A 64-bit integer vector containing the difference of the values in
 ///    the operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
-  return (__m64)((unsigned long long)__a - (unsigned long long)__b);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_si64(__m64 __a,
+                                                                  __m64 __b) {
+  return (__m64)(((__v1du)__a)[0] - ((__v1du)__b)[0]);
 }
 
 /// Subtracts the corresponding elements of two [2 x i64] vectors.
@@ -2676,8 +2678,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a,
 ///    A 128-bit integer vector containing one of the source operands.
 /// \returns A 128-bit integer vector containing the bitwise AND of the values
 ///    in both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_and_si128(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a & (__v2du)__b);
 }
 
@@ -2695,8 +2697,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a,
 ///    A 128-bit vector containing the right source operand.
 /// \returns A 128-bit integer vector containing the bitwise AND of the one's
 ///    complement of the first operand and the values in the second operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a,
-                                                              __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_andnot_si128(__m128i __a, __m128i __b) {
   return (__m128i)(~(__v2du)__a & (__v2du)__b);
 }
 /// Performs a bitwise OR of two 128-bit integer vectors.
@@ -2711,8 +2713,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a,
 ///    A 128-bit integer vector containing one of the source operands.
 /// \returns A 128-bit integer vector containing the bitwise OR of the values
 ///    in both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_or_si128(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a | (__v2du)__b);
 }
 
@@ -2728,8 +2730,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a,
 ///    A 128-bit integer vector containing one of the source operands.
 /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
 ///    values in both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_xor_si128(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a ^ (__v2du)__b);
 }
 
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index dc0fa5c..5a02a455 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -85,7 +85,7 @@ _mm_empty(void) {
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
 ///    parameter. The upper 32 bits are set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtsi32_si64(int __i)
 {
     return __extension__ (__m64)(__v2si){__i, 0};
@@ -102,7 +102,7 @@ _mm_cvtsi32_si64(int __i)
 ///    A 64-bit integer vector.
 /// \returns A 32-bit signed integer value containing the lower 32 bits of the
 ///    parameter.
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtsi64_si32(__m64 __m)
 {
     return ((__v2si)__m)[0];
@@ -118,10 +118,10 @@ _mm_cvtsi64_si32(__m64 __m)
 ///    A 64-bit signed integer.
 /// \returns A 64-bit integer vector containing the same bitwise pattern as the
 ///    parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtsi64_m64(long long __i)
 {
-    return (__m64)__i;
+    return __extension__ (__m64)(__v1di){__i};
 }
 
 /// Casts a 64-bit integer vector into a 64-bit signed integer value.
@@ -134,10 +134,10 @@ _mm_cvtsi64_m64(long long __i)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit signed integer containing the same bitwise pattern as the
 ///    parameter.
-static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
+static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtm64_si64(__m64 __m)
 {
-    return (long long)__m;
+    return ((__v1di)__m)[0];
 }
 
 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -379,7 +379,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8].
 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_add_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
@@ -400,7 +400,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_add_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
@@ -421,7 +421,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [2 x i32].
 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_add_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
@@ -536,7 +536,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
 ///    both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_sub_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
@@ -557,7 +557,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
 ///    both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_sub_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
@@ -578,7 +578,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
 ///    both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_sub_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
@@ -745,7 +745,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
 ///    of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
@@ -1134,7 +1134,7 @@ _mm_srli_si64(__m64 __m, int __count)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit integer vector containing the bitwise AND of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_and_si64(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
@@ -1155,7 +1155,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit integer vector containing the bitwise AND of the second
 ///    parameter and the one's complement of the first parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_andnot_si64(__m64 __m1, __m64 __m2)
 {
     return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
@@ -1173,7 +1173,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit integer vector containing the bitwise OR of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_or_si64(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
@@ -1191,7 +1191,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_xor_si64(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
@@ -1213,7 +1213,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8].
 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
@@ -1235,7 +1235,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
@@ -1257,7 +1257,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [2 x i32].
 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
@@ -1279,7 +1279,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8].
 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 {
   /* This function always performs a signed comparison, but __v8qi is a char
@@ -1303,7 +1303,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
@@ -1325,7 +1325,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [2 x i32].
 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)((__v2si)__m1 > (__v2si)__m2);
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 62fe3d1..4d58b4a 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -646,8 +646,17 @@ ExprResult CheckVarType(SemaOpenACC &S, OpenACCClauseKind CK, Expr *VarExpr,
   if (auto *RefTy = InnerTy->getAs<ReferenceType>())
     InnerTy = RefTy->getPointeeType();
 
-  if (auto *ArrTy = InnerTy->getAsArrayTypeUnsafe())
+  if (auto *ArrTy = InnerTy->getAsArrayTypeUnsafe()) {
+    // Non constant arrays decay to 'pointer', so warn and return that we're
+    // successful.
+    if (!ArrTy->isConstantArrayType()) {
+      S.Diag(InnerLoc, clang::diag::warn_acc_var_referenced_non_const_array)
+          << InnerTy << CK;
+      return VarExpr;
+    }
+
     return CheckVarType(S, CK, VarExpr, InnerLoc, ArrTy->getElementType());
+  }
 
   auto *RD = InnerTy->getAsCXXRecordDecl();
 
@@ -2575,8 +2584,8 @@ SemaOpenACC::ActOnOpenACCAsteriskSizeExpr(SourceLocation AsteriskLoc) {
   return BuildOpenACCAsteriskSizeExpr(AsteriskLoc);
 }
 
-VarDecl *SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK,
-                                       const Expr *VarExpr) {
+std::pair<VarDecl *, VarDecl *>
+SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK, const Expr *VarExpr) {
   // Strip off any array subscripts/array section exprs to get to the type of
   // the variable.
   while (isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(VarExpr)) {
@@ -2590,7 +2599,7 @@ VarDecl *SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK,
   // fill in with nullptr.  We'll count on TreeTransform to make this if
   // necessary.
   if (!VarExpr || VarExpr->getType()->isDependentType())
-    return nullptr;
+    return {nullptr, nullptr};
 
   QualType VarTy =
       VarExpr->getType().getNonReferenceType().getUnqualifiedType();
@@ -2602,6 +2611,7 @@ VarDecl *SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK,
       getASTContext().getTrivialTypeSourceInfo(VarTy), SC_Auto);
 
   ExprResult Init;
+  VarDecl *Temporary = nullptr;
 
   if (CK == OpenACCClauseKind::Private) {
     // Trap errors so we don't get weird ones here. If we can't init, we'll just
@@ -2626,5 +2636,5 @@ VarDecl *SemaOpenACC::CreateInitRecipe(OpenACCClauseKind CK,
     Recipe->setInitStyle(VarDecl::CallInit);
   }
 
-  return Recipe;
+  return {Recipe, Temporary};
 }
diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp
index 88d217f..e8a18243 100644
--- a/clang/lib/Sema/SemaOpenACCClause.cpp
+++ b/clang/lib/Sema/SemaOpenACCClause.cpp
@@ -800,7 +800,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitPrivateClause(
   // Assemble the recipes list.
   for (const Expr *VarExpr : Clause.getVarList())
     InitRecipes.push_back(
-        SemaRef.CreateInitRecipe(OpenACCClauseKind::Private, VarExpr));
+        SemaRef.CreateInitRecipe(OpenACCClauseKind::Private, VarExpr).first);
 
   return OpenACCPrivateClause::Create(
       Ctx, Clause.getBeginLoc(), Clause.getLParenLoc(), Clause.getVarList(),
@@ -813,7 +813,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitFirstPrivateClause(
   // really isn't anything to do here. GCC does some duplicate-finding, though
   // it isn't apparent in the standard where this is justified.
 
-  llvm::SmallVector<VarDecl *> InitRecipes;
+  llvm::SmallVector<OpenACCFirstPrivateRecipe> InitRecipes;
 
   // Assemble the recipes list.
   for (const Expr *VarExpr : Clause.getVarList())
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 6ce5535..0030946 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -11901,8 +11901,11 @@ void OpenACCClauseTransform<Derived>::VisitPrivateClause(
       if (InitRecipe)
         InitRecipes.push_back(InitRecipe);
       else
-        InitRecipes.push_back(Self.getSema().OpenACC().CreateInitRecipe(
-            OpenACCClauseKind::Private, VarRef.get()));
+        InitRecipes.push_back(
+            Self.getSema()
+                .OpenACC()
+                .CreateInitRecipe(OpenACCClauseKind::Private, VarRef.get())
+                .first);
     }
   }
   ParsedClause.setVarListDetails(InstantiatedVarList,
@@ -11942,7 +11945,7 @@ template <typename Derived>
 void OpenACCClauseTransform<Derived>::VisitFirstPrivateClause(
     const OpenACCFirstPrivateClause &C) {
   llvm::SmallVector<Expr *> InstantiatedVarList;
-  llvm::SmallVector<VarDecl *> InitRecipes;
+  llvm::SmallVector<OpenACCFirstPrivateRecipe> InitRecipes;
 
   for (const auto [RefExpr, InitRecipe] :
        llvm::zip(C.getVarList(), C.getInitRecipes())) {
@@ -11953,7 +11956,7 @@ void OpenACCClauseTransform<Derived>::VisitFirstPrivateClause(
 
       // We only have to create a new one if it is dependent, and Sema won't
       // make one of these unless the type is non-dependent.
-      if (InitRecipe)
+      if (InitRecipe.RecipeDecl)
         InitRecipes.push_back(InitRecipe);
       else
         InitRecipes.push_back(Self.getSema().OpenACC().CreateInitRecipe(
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 1402f40..ed0ec9e 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -12877,9 +12877,12 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
   case OpenACCClauseKind::FirstPrivate: {
     SourceLocation LParenLoc = readSourceLocation();
     llvm::SmallVector<Expr *> VarList = readOpenACCVarList();
-    llvm::SmallVector<VarDecl *> RecipeList;
-    for (unsigned I = 0; I < VarList.size(); ++I)
-      RecipeList.push_back(readDeclAs<VarDecl>());
+    llvm::SmallVector<OpenACCFirstPrivateRecipe> RecipeList;
+    for (unsigned I = 0; I < VarList.size(); ++I) {
+      VarDecl *Recipe = readDeclAs<VarDecl>();
+      VarDecl *RecipeTemp = readDeclAs<VarDecl>();
+      RecipeList.push_back({Recipe, RecipeTemp});
+    }
 
     return OpenACCFirstPrivateClause::Create(getContext(), BeginLoc, LParenLoc,
                                              VarList, RecipeList, EndLoc);
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c038d4d..c072acd 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -5461,11 +5461,6 @@ ASTWriter::WriteAST(llvm::PointerUnion<Sema *, Preprocessor *> Subject,
 
   WritingAST = false;
 
-  if (WritingModule && PPRef.getHeaderSearchInfo()
-                           .getHeaderSearchOpts()
-                           .ModulesValidateOncePerBuildSession)
-    ModCache.updateModuleTimestamp(OutputFile);
-
   if (ShouldCacheASTInMemory) {
     // Construct MemoryBuffer and update buffer manager.
     ModCache.getInMemoryModuleCache().addBuiltPCM(
@@ -8762,8 +8757,10 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
     writeSourceLocation(FPC->getLParenLoc());
     writeOpenACCVarList(FPC);
 
-    for (VarDecl *VD : FPC->getInitRecipes())
-      AddDeclRef(VD);
+    for (const OpenACCFirstPrivateRecipe &R : FPC->getInitRecipes()) {
+      AddDeclRef(R.RecipeDecl);
+      AddDeclRef(R.InitFromTemporary);
+    }
     return;
   }
   case OpenACCClauseKind::Attach: {
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index fd0a398..0e5fc0a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -97,10 +97,6 @@ public:
   CheckerFrontendWithBugType UninitializedRead{
       "Accessing unitialized/garbage values"};
 
-  // FIXME: This bug type should be removed because it is only emitted in a
-  // situation that is practically impossible.
-  const BugType AdditionOverflow{&OutOfBounds, "API"};
-
   StringRef getDebugTag() const override { return "MallocChecker"; }
 
   static void *getTag() { static int tag; return &tag; }
@@ -330,7 +326,6 @@ public:
                           const Stmt *S, StringRef WarningMsg) const;
   void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
                          const Stmt *S, StringRef WarningMsg) const;
-  void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
   void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
                                 const Expr *E, const MemRegion *R,
                                 StringRef Msg) const;
@@ -843,22 +838,6 @@ void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
   }
 }
 
-void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
-                                             ProgramStateRef State) const {
-  if (ExplodedNode *N = C.generateErrorNode(State)) {
-    // This isn't a great error message, but this should never occur in real
-    // code anyway -- you'd have to create a buffer longer than a size_t can
-    // represent, which is sort of a contradiction.
-    const char *WarningMsg =
-        "This expression will create a string whose length is too big to "
-        "be represented as a size_t";
-
-    auto Report = std::make_unique<PathSensitiveBugReport>(AdditionOverflow,
-                                                           WarningMsg, N);
-    C.emitReport(std::move(Report));
-  }
-}
-
 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
                                                      ProgramStateRef state,
                                                      NonLoc left,
@@ -896,19 +875,22 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
                                                 *maxMinusRightNL, cmpTy);
 
-    ProgramStateRef stateOverflow, stateOkay;
-    std::tie(stateOverflow, stateOkay) =
-      state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
+    auto [StateOverflow, StateOkay] =
+        state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
 
-    if (stateOverflow && !stateOkay) {
-      // We have an overflow. Emit a bug report.
-      emitAdditionOverflowBug(C, stateOverflow);
+    if (StateOverflow && !StateOkay) {
+      // On this path the analyzer is convinced that the addition of these two
+      // values would overflow `size_t` which must be caused by the inaccuracy
+      // of our modeling because this method is called in situations where the
+      // summands are size/length values which are much less than SIZE_MAX. To
+      // avoid false positives let's just sink this invalid path.
+      C.addSink(StateOverflow);
       return nullptr;
     }
 
     // From now on, assume an overflow didn't occur.
-    assert(stateOkay);
-    state = stateOkay;
+    assert(StateOkay);
+    state = StateOkay;
   }
 
   return state;
diff --git a/clang/test/AST/ByteCode/cxx11.cpp b/clang/test/AST/ByteCode/cxx11.cpp
index bb8aca2..7aecf23b 100644
--- a/clang/test/AST/ByteCode/cxx11.cpp
+++ b/clang/test/AST/ByteCode/cxx11.cpp
@@ -318,3 +318,15 @@ namespace PR19010 {
   };
   void test() { constexpr Test t; }
 }
+
+namespace ReadMutableInCopyCtor {
+  struct G {
+    struct X {};
+    union U { X a; };
+    mutable U u; // both-note {{declared here}}
+  };
+  constexpr G g1 = {};
+  constexpr G g2 = g1; // both-error {{must be initialized by a constant expression}} \
+                       // both-note {{read of mutable member 'u'}} \
+                       // both-note {{in call to 'G(g1)'}}
+}
diff --git a/clang/test/AST/ByteCode/functions.cpp b/clang/test/AST/ByteCode/functions.cpp
index 363b6a5..36e7bb3 100644
--- a/clang/test/AST/ByteCode/functions.cpp
+++ b/clang/test/AST/ByteCode/functions.cpp
@@ -622,7 +622,7 @@ namespace FromIntegral {
   int a[(int)DoubleFn((void*)-1)()]; // both-error {{not allowed at file scope}} \
                                     // both-warning {{variable length arrays}}
   int b[(int)DoubleFn((void*)(-1 + 1))()]; // both-error {{not allowed at file scope}} \
-                                           // expected-note {{evaluates to a null function pointer}} \
+                                           // both-note {{evaluates to a null function pointer}} \
                                            // both-warning {{variable length arrays}}
 #endif
 }
diff --git a/clang/test/CIR/CodeGen/complex-arithmetic.cpp b/clang/test/CIR/CodeGen/complex-arithmetic.cpp
index 5e384cd..fbb0335 100644
--- a/clang/test/CIR/CodeGen/complex-arithmetic.cpp
+++ b/clang/test/CIR/CodeGen/complex-arithmetic.cpp
@@ -11,16 +11,16 @@ void foo() {
   int _Complex c = a + b;
 }
 
-// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
-// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
-// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
-// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
 // CIR: %[[ADD:.*]] = cir.complex.add %[[TMP_A]], %[[TMP_B]] : !cir.complex<!s32i>
 
-// LLVM: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, i64 1, align 4
-// LLVM: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, i64 1, align 4
-// LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[COMPLEX_A]], align 4
-// LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[COMPLEX_B]], align 4
+// LLVM: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4
 // LLVM: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0
 // LLVM: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1
 // LLVM: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0
@@ -30,16 +30,16 @@ void foo() {
 // LLVM: %[[RESULT:.*]] = insertvalue { i32, i32 } poison, i32 %[[ADD_REAL]], 0
 // LLVM: %[[RESULT_2:.*]] = insertvalue { i32, i32 } %[[RESULT]], i32 %[[ADD_IMAG]], 1
 
-// OGCG: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, align 4
-// OGCG: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, align 4
+// OGCG: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { i32, i32 }, align 4
 // OGCG: %[[RESULT:.*]] = alloca { i32, i32 }, align 4
-// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_A]], i32 0, i32 0
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 0
 // OGCG: %[[A_REAL:.*]] = load i32, ptr %[[A_REAL_PTR]], align 4
-// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_A]], i32 0, i32 1
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 1
 // OGCG: %[[A_IMAG:.*]] = load i32, ptr %[[A_IMAG_PTR]], align 4
-// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 0
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
 // OGCG: %[[B_REAL:.*]] = load i32, ptr %[[B_REAL_PTR]], align 4
-// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 1
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
 // OGCG: %[[B_IMAG:.*]] = load i32, ptr %[[B_IMAG_PTR]], align 4
 // OGCG: %[[ADD_REAL:.*]] = add i32 %[[A_REAL]], %[[B_REAL]]
 // OGCG: %[[ADD_IMAG:.*]] = add i32 %[[A_IMAG]], %[[B_IMAG]]
@@ -54,16 +54,16 @@ void foo2() {
   float _Complex c = a + b;
 }
 
-// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
-// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
-// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
-// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: %[[ADD:.*]] = cir.complex.add %[[TMP_A]], %[[TMP_B]] : !cir.complex<!cir.float>
 
-// LLVM: %[[COMPLEX_A:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[COMPLEX_B:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[COMPLEX_A]], align 4
-// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[COMPLEX_B]], align 4
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
 // LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
 // LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
 // LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
@@ -73,16 +73,16 @@ void foo2() {
 // LLVM: %[[RESULT:.*]] = insertvalue { float, float } poison, float %[[ADD_REAL]], 0
 // LLVM: %[[RESULT_2:.*]] = insertvalue { float, float } %[[RESULT]], float %[[ADD_IMAG]], 1
 
-// OGCG: %[[COMPLEX_A:.*]] = alloca { float, float }, align 4
-// OGCG: %[[COMPLEX_B:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
 // OGCG: %[[RESULT:.*]] = alloca { float, float }, align 4
-// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 0
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
 // OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
-// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 1
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
 // OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
-// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 0
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
 // OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
-// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 1
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
 // OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
 // OGCG: %[[ADD_REAL:.*]] = fadd float %[[A_REAL]], %[[B_REAL]]
 // OGCG: %[[ADD_IMAG:.*]] = fadd float %[[A_IMAG]], %[[B_IMAG]]
@@ -98,23 +98,23 @@ void foo3() {
   float _Complex d = (a + b) + c;
 }
 
-// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
-// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
-// CIR: %[[COMPLEX_C:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c"]
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c"]
 // CIR: %[[RESULT:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["d", init]
-// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
-// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: %[[ADD_A_B:.*]] = cir.complex.add %[[TMP_A]], %[[TMP_B]] : !cir.complex<!cir.float>
-// CIR: %[[TMP_C:.*]] = cir.load{{.*}} %[[COMPLEX_C]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_C:.*]] = cir.load{{.*}} %[[C_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: %[[ADD_A_B_C:.*]] = cir.complex.add %[[ADD_A_B]], %[[TMP_C]] : !cir.complex<!cir.float>
 // CIR: cir.store{{.*}} %[[ADD_A_B_C]], %[[RESULT]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
 
-// LLVM: %[[COMPLEX_A:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[COMPLEX_B:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[COMPLEX_C:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4
 // LLVM: %[[RESULT:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[COMPLEX_A]], align 4
-// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[COMPLEX_B]], align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
 // LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
 // LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
 // LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
@@ -123,7 +123,7 @@ void foo3() {
 // LLVM: %[[ADD_IMAG_A_B:.*]] = fadd float %[[A_IMAG]], %[[B_IMAG]]
 // LLVM: %[[A_B:.*]] = insertvalue { float, float } poison, float %[[ADD_REAL_A_B]], 0
 // LLVM: %[[TMP_A_B:.*]] = insertvalue { float, float } %[[A_B]], float %[[ADD_IMAG_A_B]], 1
-// LLVM: %[[TMP_C:.*]] = load { float, float }, ptr %[[COMPLEX_C]], align 4
+// LLVM: %[[TMP_C:.*]] = load { float, float }, ptr %[[C_ADDR]], align 4
 // LLVM: %[[A_B_REAL:.*]] = extractvalue { float, float } %[[TMP_A_B]], 0
 // LLVM: %[[A_B_IMAG:.*]] = extractvalue { float, float } %[[TMP_A_B]], 1
 // LLVM: %[[C_REAL:.*]] = extractvalue { float, float } %[[TMP_C]], 0
@@ -134,23 +134,23 @@ void foo3() {
 // LLVM: %[[TMP_A_B_C:.*]] = insertvalue { float, float } %[[A_B_C]], float %[[ADD_IMAG_A_B_C]], 1
 // LLVM: store { float, float } %[[TMP_A_B_C]], ptr %[[RESULT]], align 4
 
-// OGCG: %[[COMPLEX_A:.*]] = alloca { float, float }, align 4
-// OGCG: %[[COMPLEX_B:.*]] = alloca { float, float }, align 4
-// OGCG: %[[COMPLEX_C:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[C_ADDR:.*]] = alloca { float, float }, align 4
 // OGCG: %[[RESULT:.*]] = alloca { float, float }, align 4
-// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 0
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
 // OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
-// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 1
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
 // OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
-// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 0
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
 // OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
-// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 1
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
 // OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
 // OGCG: %[[ADD_REAL_A_B:.*]] = fadd float %[[A_REAL]], %[[B_REAL]]
 // OGCG: %[[ADD_IMAG_A_B:.*]] = fadd float %[[A_IMAG]], %[[B_IMAG]]
-// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_C]], i32 0, i32 0
+// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0
 // OGCG: %[[C_REAL:.*]] = load float, ptr %[[C_REAL_PTR]], align 4
-// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_C]], i32 0, i32 1
+// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1
 // OGCG: %[[C_IMAG:.*]] = load float, ptr %[[C_IMAG_PTR]], align 4
 // OGCG: %[[ADD_REAL_A_B_C:.*]] = fadd float %[[ADD_REAL_A_B]], %[[C_REAL]]
 // OGCG: %[[ADD_IMAG_A_B_C:.*]] = fadd float %[[ADD_IMAG_A_B]], %[[C_IMAG]]
@@ -165,17 +165,17 @@ void foo4() {
   int _Complex c = a - b;
 }
 
-// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
-// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
-// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
-// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
 // CIR: %[[SUB:.*]] = cir.complex.sub %[[TMP_A]], %[[TMP_B]] : !cir.complex<!s32i>
 
-// LLVM: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, i64 1, align 4
-// LLVM: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, i64 1, align 4
-// LLVM: %[[COMPLEX_C:.*]] = alloca { i32, i32 }, i64 1, align 4
-// LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[COMPLEX_A]], align 4
-// LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[COMPLEX_B]], align 4
+// LLVM: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4
 // LLVM: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0
 // LLVM: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1
 // LLVM: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0
@@ -184,18 +184,18 @@ void foo4() {
 // LLVM: %[[SUB_IMAG:.*]] = sub i32 %[[A_IMAG]], %[[B_IMAG]]
 // LLVM: %[[RESULT:.*]] = insertvalue { i32, i32 } poison, i32 %[[SUB_REAL]], 0
 // LLVM: %[[RESULT_2:.*]] = insertvalue { i32, i32 } %[[RESULT]], i32 %[[SUB_IMAG]], 1
-// LLVM: store { i32, i32 } %[[RESULT_2]], ptr %[[COMPLEX_C]], align 4
+// LLVM: store { i32, i32 } %[[RESULT_2]], ptr %[[C_ADDR]], align 4
 
-// OGCG: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, align 4
-// OGCG: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, align 4
+// OGCG: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { i32, i32 }, align 4
 // OGCG: %[[RESULT:.*]] = alloca { i32, i32 }, align 4
-// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_A]], i32 0, i32 0
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 0
 // OGCG: %[[A_REAL:.*]] = load i32, ptr %[[A_REAL_PTR]], align 4
-// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_A]], i32 0, i32 1
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 1
 // OGCG: %[[A_IMAG:.*]] = load i32, ptr %[[A_IMAG_PTR]], align 4
-// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 0
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
 // OGCG: %[[B_REAL:.*]] = load i32, ptr %[[B_REAL_PTR]], align 4
-// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[COMPLEX_B]], i32 0, i32 1
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
 // OGCG: %[[B_IMAG:.*]] = load i32, ptr %[[B_IMAG_PTR]], align 4
 // OGCG: %[[SUB_REAL:.*]] = sub i32 %[[A_REAL]], %[[B_REAL]]
 // OGCG: %[[SUB_IMAG:.*]] = sub i32 %[[A_IMAG]], %[[B_IMAG]]
@@ -210,16 +210,16 @@ void foo5() {
   float _Complex c = a - b;
 }
 
-// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
-// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
-// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
-// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: %[[SUB:.*]] = cir.complex.sub %[[TMP_A]], %[[TMP_B]] : !cir.complex<!cir.float>
 
-// LLVM: %[[COMPLEX_A:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[COMPLEX_B:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[COMPLEX_A]], align 4
-// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[COMPLEX_B]], align 4
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
 // LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
 // LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
 // LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
@@ -229,16 +229,16 @@ void foo5() {
 // LLVM: %[[RESULT:.*]] = insertvalue { float, float } poison, float %[[SUB_REAL]], 0
 // LLVM: %[[RESULT_2:.*]] = insertvalue { float, float } %[[RESULT]], float %[[SUB_IMAG]], 1
 
-// OGCG: %[[COMPLEX_A:.*]] = alloca { float, float }, align 4
-// OGCG: %[[COMPLEX_B:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
 // OGCG: %[[RESULT:.*]] = alloca { float, float }, align 4
-// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 0
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
 // OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
-// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 1
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
 // OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
-// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 0
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
 // OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
-// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 1
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
 // OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
 // OGCG: %[[SUB_REAL:.*]] = fsub float %[[A_REAL]], %[[B_REAL]]
 // OGCG: %[[SUB_IMAG:.*]] = fsub float %[[A_IMAG]], %[[B_IMAG]]
@@ -254,23 +254,23 @@ void foo6() {
   float _Complex d = (a - b) - c;
 }
 
-// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
-// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
-// CIR: %[[COMPLEX_C:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c"]
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c"]
 // CIR: %[[RESULT:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["d", init]
-// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
-// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: %[[SUB_A_B:.*]] = cir.complex.sub %[[TMP_A]], %[[TMP_B]] : !cir.complex<!cir.float>
-// CIR: %[[TMP_C:.*]] = cir.load{{.*}} %[[COMPLEX_C]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_C:.*]] = cir.load{{.*}} %[[C_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
 // CIR: %[[SUB_A_B_C:.*]] = cir.complex.sub %[[SUB_A_B]], %[[TMP_C]] : !cir.complex<!cir.float>
 // CIR: cir.store{{.*}} %[[SUB_A_B_C]], %[[RESULT]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
 
-// LLVM: %[[COMPLEX_A:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[COMPLEX_B:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[COMPLEX_C:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4
 // LLVM: %[[RESULT:.*]] = alloca { float, float }, i64 1, align 4
-// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[COMPLEX_A]], align 4
-// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[COMPLEX_B]], align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
 // LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
 // LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
 // LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
@@ -279,7 +279,7 @@ void foo6() {
 // LLVM: %[[SUB_IMAG_A_B:.*]] = fsub float %[[A_IMAG]], %[[B_IMAG]]
 // LLVM: %[[A_B:.*]] = insertvalue { float, float } poison, float %[[SUB_REAL_A_B]], 0
 // LLVM: %[[TMP_A_B:.*]] = insertvalue { float, float } %[[A_B]], float %[[SUB_IMAG_A_B]], 1
-// LLVM: %[[TMP_C:.*]] = load { float, float }, ptr %[[COMPLEX_C]], align 4
+// LLVM: %[[TMP_C:.*]] = load { float, float }, ptr %[[C_ADDR]], align 4
 // LLVM: %[[A_B_REAL:.*]] = extractvalue { float, float } %[[TMP_A_B]], 0
 // LLVM: %[[A_B_IMAG:.*]] = extractvalue { float, float } %[[TMP_A_B]], 1
 // LLVM: %[[C_REAL:.*]] = extractvalue { float, float } %[[TMP_C]], 0
@@ -290,23 +290,23 @@ void foo6() {
 // LLVM: %[[TMP_A_B_C:.*]] = insertvalue { float, float } %[[A_B_C]], float %[[SUB_IMAG_A_B_C]], 1
 // LLVM: store { float, float } %[[TMP_A_B_C]], ptr %[[RESULT]], align 4
 
-// OGCG: %[[COMPLEX_A:.*]] = alloca { float, float }, align 4
-// OGCG: %[[COMPLEX_B:.*]] = alloca { float, float }, align 4
-// OGCG: %[[COMPLEX_C:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[C_ADDR:.*]] = alloca { float, float }, align 4
 // OGCG: %[[RESULT:.*]] = alloca { float, float }, align 4
-// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 0
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
 // OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
-// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_A]], i32 0, i32 1
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
 // OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
-// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 0
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
 // OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
-// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_B]], i32 0, i32 1
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
 // OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
 // OGCG: %[[SUB_REAL_A_B:.*]] = fsub float %[[A_REAL]], %[[B_REAL]]
 // OGCG: %[[SUB_IMAG_A_B:.*]] = fsub float %[[A_IMAG]], %[[B_IMAG]]
-// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_C]], i32 0, i32 0
+// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0
 // OGCG: %[[C_REAL:.*]] = load float, ptr %[[C_REAL_PTR]], align 4
-// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_C]], i32 0, i32 1
+// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1
 // OGCG: %[[C_IMAG:.*]] = load float, ptr %[[C_IMAG_PTR]], align 4
 // OGCG: %[[SUB_REAL_A_B_C:.*]] = fsub float %[[SUB_REAL_A_B]], %[[C_REAL]]
 // OGCG: %[[SUB_IMAG_A_B_C:.*]] = fsub float %[[SUB_IMAG_A_B]], %[[C_IMAG]]
@@ -314,3 +314,4 @@ void foo6() {
 // OGCG: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[RESULT]], i32 0, i32 1
 // OGCG: store float %[[SUB_REAL_A_B_C]], ptr %[[RESULT_REAL_PTR]], align 4
 // OGCG: store float %[[SUB_IMAG_A_B_C]], ptr %[[RESULT_IMAG_PTR]], align 4
+
diff --git a/clang/test/CIR/CodeGen/complex-compound-assignment.cpp b/clang/test/CIR/CodeGen/complex-compound-assignment.cpp
new file mode 100644
index 0000000..35a8aa6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/complex-compound-assignment.cpp
@@ -0,0 +1,288 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR,CXX_CIR
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM,CXX_LLVM
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG,CXX_OGCG
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+void foo() {
+  float _Complex a;
+  float _Complex b;
+  b += a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[RESULT:.*]] = cir.complex.add %[[TMP_B]], %[[TMP_A]] : !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
+// LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1
+// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM: %[[ADD_REAL_A_B:.*]] = fadd float %[[B_REAL]], %[[A_REAL]]
+// LLVM: %[[ADD_IMAG_A_B:.*]] = fadd float %[[B_IMAG]], %[[A_IMAG]]
+// LLVM: %[[ADD_A_B:.*]] = insertvalue { float, float } poison, float %[[ADD_REAL_A_B]], 0
+// LLVM: %[[RESULT:.*]] = insertvalue { float, float } %[[ADD_A_B]], float %[[ADD_IMAG_A_B]], 1
+// LLVM: store { float, float } %[[RESULT]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
+// OGCG: %[[ADD_REAL:.*]] = fadd float %[[B_REAL]], %[[A_REAL]]
+// OGCG: %[[ADD_IMAG:.*]] = fadd float %[[B_IMAG]], %[[A_IMAG]]
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store float %[[ADD_REAL]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store float %[[ADD_IMAG]], ptr %[[B_IMAG_PTR]], align 4
+
+void foo1() {
+  float _Complex a;
+  float _Complex b;
+  b -= a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[RESULT:.*]] = cir.complex.sub %[[TMP_B]], %[[TMP_A]] : !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
+// LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1
+// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM: %[[SUB_REAL_A_B:.*]] = fsub float %[[B_REAL]], %[[A_REAL]]
+// LLVM: %[[SUB_IMAG_A_B:.*]] = fsub float %[[B_IMAG]], %[[A_IMAG]]
+// LLVM: %[[SUB_A_B:.*]] = insertvalue { float, float } poison, float %[[SUB_REAL_A_B]], 0
+// LLVM: %[[RESULT:.*]] = insertvalue { float, float } %[[SUB_A_B]], float %[[SUB_IMAG_A_B]], 1
+// LLVM: store { float, float } %[[RESULT]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
+// OGCG: %[[SUB_REAL:.*]] = fsub float %[[B_REAL]], %[[A_REAL]]
+// OGCG: %[[SUB_IMAG:.*]] = fsub float %[[B_IMAG]], %[[A_IMAG]]
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store float %[[SUB_REAL]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store float %[[SUB_IMAG]], ptr %[[B_IMAG_PTR]], align 4
+
+void foo2() {
+  int _Complex a;
+  int _Complex b;
+  b += a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[RESULT:.*]] = cir.complex.add %[[TMP_B]], %[[TMP_A]] : !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4
+// LLVM: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 1
+// LLVM: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1
+// LLVM: %[[ADD_REAL_A_B:.*]] = add i32 %[[B_REAL]], %[[A_REAL]]
+// LLVM: %[[ADD_IMAG_A_B:.*]] = add i32 %[[B_IMAG]], %[[A_IMAG]]
+// LLVM: %[[ADD_A_B:.*]] = insertvalue { i32, i32 } poison, i32 %[[ADD_REAL_A_B]], 0
+// LLVM: %[[RESULT:.*]] = insertvalue { i32, i32 } %[[ADD_A_B]], i32 %[[ADD_IMAG_A_B]], 1
+// LLVM: store { i32, i32 } %[[RESULT]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load i32, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load i32, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_REAL:.*]] = load i32, ptr %[[B_REAL_PTR]], align 4
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: %[[B_IMAG:.*]] = load i32, ptr %[[B_IMAG_PTR]], align 4
+// OGCG: %[[ADD_REAL:.*]] = add i32 %[[B_REAL]], %[[A_REAL]]
+// OGCG: %[[ADD_IMAG:.*]] = add i32 %[[B_IMAG]], %[[A_IMAG]]
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store i32 %[[ADD_REAL]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store i32 %[[ADD_IMAG]], ptr %[[B_IMAG_PTR]], align 4
+
+void foo3() {
+  _Float16 _Complex a;
+  _Float16 _Complex b;
+  b += a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
+// CIR: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[B_REAL_F32:.*]] = cir.cast(floating, %[[B_REAL]] : !cir.f16), !cir.float
+// CIR: %[[B_IMAG_F32:.*]] = cir.cast(floating, %[[B_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[B_COMPLEX_F32:.*]] = cir.complex.create %[[B_REAL_F32]], %[[B_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[ADD_A_B:.*]] = cir.complex.add %[[B_COMPLEX_F32]], %[[A_COMPLEX_F32]] : !cir.complex<!cir.float>
+// CIR: %[[ADD_REAL:.*]] = cir.complex.real %[[ADD_A_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[ADD_IMAG:.*]] = cir.complex.imag %[[ADD_A_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[ADD_REAL_F16:.*]] = cir.cast(floating, %[[ADD_REAL]] : !cir.float), !cir.f16
+// CIR: %[[ADD_IMAG_F16:.*]] = cir.cast(floating, %[[ADD_IMAG]] : !cir.float), !cir.f16
+// CIR: %[[RESULT:.*]] = cir.complex.create %[[ADD_REAL_F16]], %[[ADD_IMAG_F16]] : !cir.f16 -> !cir.complex<!cir.f16>
+// CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
+// LLVM: %[[B_ADDR:.*]] = alloca { half, half }, i64 1, align 2
+// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2
+// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1
+// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
+// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0
+// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %8, float %[[A_IMAG_F32]], 1
+// LLVM: %[[TMP_B:.*]] = load { half, half }, ptr %[[B_ADDR]], align 2
+// LLVM: %[[B_REAL:.*]] = extractvalue { half, half } %[[TMP_B]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { half, half } %[[TMP_B]], 1
+// LLVM: %[[B_REAL_F32:.*]] = fpext half %[[B_REAL]] to float
+// LLVM: %[[B_IMAG_F32:.*]] = fpext half %[[B_IMAG]] to float
+// LLVM: %[[TMP_B_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[B_REAL_F32]], 0
+// LLVM: %[[B_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_B_COMPLEX_F32]], float %[[B_IMAG_F32]], 1
+// LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[B_COMPLEX_F32]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { float, float } %[[B_COMPLEX_F32]], 1
+// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[A_COMPLEX_F32]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[A_COMPLEX_F32]], 1
+// LLVM: %[[ADD_REAL:.*]] = fadd float %[[B_REAL]], %[[A_REAL]]
+// LLVM: %[[ADD_IMAG:.*]] = fadd float %[[B_IMAG]], %[[A_IMAG]]
+// LLVM: %[[TMP_RESULT:.*]] = insertvalue { float, float } poison, float %[[ADD_REAL]], 0
+// LLVM: %[[RESULT:.*]] = insertvalue { float, float } %[[TMP_RESULT]], float %[[ADD_IMAG]], 1
+// LLVM: %[[RESULT_REAL:.*]] = extractvalue { float, float } %[[RESULT]], 0
+// LLVM: %[[RESULT_IMAG:.*]] = extractvalue { float, float } %[[RESULT]], 1
+// LLVM: %[[RESULT_REAL_F16:.*]] = fptrunc float %[[RESULT_REAL]] to half
+// LLVM: %[[RESULT_IMAG_F26:.*]] = fptrunc float %[[RESULT_IMAG]] to half
+// LLVM: %[[TMP_RESULT_F16:.*]] = insertvalue { half, half } undef, half %[[RESULT_REAL_F16]], 0
+// LLVM: %[[RESULT_F16:.*]] = insertvalue { half, half } %29, half %[[RESULT_IMAG_F26]], 1
+// LLVM: store { half, half } %[[RESULT_F16]], ptr %[[B_ADDR]], align 2
+
+// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2
+// OGCG: %[[B_ADDR:.*]] = alloca { half, half }, align 2
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load half, ptr %[[A_REAL_PTR]], align 2
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2
+// OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
+// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_REAL:.*]] = load half, ptr %[[B_REAL_PTR]], align 2
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: %[[B_IMAG:.*]] = load half, ptr %[[B_IMAG_PTR]], align 2
+// OGCG: %[[B_REAL_F32:.*]] = fpext half %[[B_REAL]] to float
+// OGCG: %[[B_IMAG_F32:.*]] = fpext half %[[B_IMAG]] to float
+// OGCG: %[[ADD_REAL:.*]] = fadd float %[[B_REAL_F32]], %[[A_REAL_F32]]
+// OGCG: %[[ADD_IMAG:.*]] = fadd float %[[B_IMAG_F32]], %[[A_IMAG_F32]]
+// OGCG: %[[ADD_REAL_F16:.*]] = fptrunc float %[[ADD_REAL]] to half
+// OGCG: %[[ADD_IMAG_F16:.*]] = fptrunc float %[[ADD_IMAG]] to half
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store half %[[ADD_REAL_F16]], ptr %[[B_REAL_PTR]], align 2
+// OGCG: store half %[[ADD_IMAG_F16]], ptr %[[B_IMAG_PTR]], align 2
+
+#ifdef __cplusplus
+void foo4() {
+  volatile _Complex int a;
+  volatile _Complex int b;
+  int _Complex c = b += a;
+}
+#endif
+
+// CXX_CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
+// CXX_CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
+// CXX_CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init]
+// CXX_CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CXX_CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CXX_CIR: %[[RESULT:.*]] = cir.complex.add %[[TMP_B]], %[[TMP_A]] : !cir.complex<!s32i>
+// CXX_CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+// CXX_CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CXX_CIR: cir.store{{.*}} %[[TMP_B]], %[[C_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>
+
+// CXX_LLVM: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// CXX_LLVM: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// CXX_LLVM: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// CXX_LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4
+// CXX_LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4
+// CXX_LLVM: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0
+// CXX_LLVM: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 1
+// CXX_LLVM: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0
+// CXX_LLVM: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1
+// CXX_LLVM: %[[ADD_REAL:.*]] = add i32 %[[B_REAL]], %[[A_REAL]]
+// CXX_LLVM: %[[ADD_IMAG:.*]] = add i32 %[[B_IMAG]], %[[A_IMAG]]
+// CXX_LLVM: %[[TMP_RESULT:.*]] = insertvalue { i32, i32 } poison, i32 %[[ADD_REAL]], 0
+// CXX_LLVM: %[[RESULT:.*]] = insertvalue { i32, i32 } %[[TMP_RESULT]], i32 %[[ADD_IMAG]], 1
+// CXX_LLVM: store { i32, i32 } %[[RESULT]], ptr %[[B_ADDR]], align 4
+// CXX_LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4
+// CXX_LLVM: store { i32, i32 } %[[TMP_B]], ptr %[[C_ADDR]], align 4
+
+// CXX_OGCG: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4
+// CXX_OGCG: %[[B_ADDR:.*]] = alloca { i32, i32 }, align 4
+// CXX_OGCG: %[[C_ADDR:.*]] = alloca { i32, i32 }, align 4
+// CXX_OGCG: %a.realp = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 0
+// CXX_OGCG: %a.real = load volatile i32, ptr %a.realp, align 4
+// CXX_OGCG: %a.imagp = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 1
+// CXX_OGCG: %a.imag = load volatile i32, ptr %a.imagp, align 4
+// CXX_OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// CXX_OGCG: %[[B_REAL:.*]] = load volatile i32, ptr %[[B_REAL_PTR]], align 4
+// CXX_OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// CXX_OGCG: %[[B_IMAG:.*]] = load volatile i32, ptr %[[B_IMAG_PTR]], align 4
+// CXX_OGCG: %[[ADD_REAL:.*]] = add i32 %[[B_REAL]], %[[A_REAL]]
+// CXX_OGCG: %[[ADD_IMAG:.*]] = add i32 %[[B_IMAG]], %[[A_IMAG]]
+// CXX_OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// CXX_OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// CXX_OGCG: store volatile i32 %[[ADD_REAL]], ptr %[[B_REAL_PTR]], align 4
+// CXX_OGCG: store volatile i32 %[[ADD_IMAG]], ptr %[[B_IMAG_PTR]], align 4
+// CXX_OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// CXX_OGCG: %[[B_REAL:.*]] = load volatile i32, ptr %[[B_REAL_PTR]], align 4
+// CXX_OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// CXX_OGCG: %[[B_IMAG:.*]] = load volatile i32, ptr %[[B_IMAG_PTR]], align 4
+// CXX_OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 0
+// CXX_OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1
+// CXX_OGCG: store i32 %[[B_REAL]], ptr %[[C_REAL_PTR]], align 4
+// CXX_OGCG: store i32 %[[B_IMAG]], ptr %[[C_IMAG_PTR]], align 4
diff --git a/clang/test/CIR/CodeGen/variable-template-specialization.cpp b/clang/test/CIR/CodeGen/variable-template-specialization.cpp
new file mode 100644
index 0000000..c13ab51
--- /dev/null
+++ b/clang/test/CIR/CodeGen/variable-template-specialization.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -std=c++14 -triple nvptx64-unknown-unknown -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++14 -triple nvptx64-unknown-unknown -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++14 -triple nvptx64-unknown-unknown -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct some_struct {
+    int x;
+};
+
+template<int I>
+int var_template;
+
+template<> int var_template<0>;
+template<> int var_template<1> = 1;
+template<> some_struct var_template<2>;
+
+// CIR: !rec_some_struct = !cir.record<struct "some_struct" {!s32i}>
+// CIR: cir.global external @_Z12var_templateILi0EE = #cir.int<0> : !s32i
+// CIR: cir.global external @_Z12var_templateILi1EE = #cir.int<1> : !s32i
+// CIR: cir.global external @_Z12var_templateILi2EE = #cir.zero : !rec_some_struct
+
+// LLVM: %[[STRUCT_TYPE:.+]] = type { i32 }
+// LLVM: @_Z12var_templateILi0EE = global i32 0
+// LLVM: @_Z12var_templateILi1EE = global i32 1
+// LLVM: @_Z12var_templateILi2EE = global %[[STRUCT_TYPE]] zeroinitializer
+
+// OGCG: %[[STRUCT_TYPE:.+]] = type { i32 }
+// OGCG: @_Z12var_templateILi0EE = global i32 0
+// OGCG: @_Z12var_templateILi1EE = global i32 1
+// OGCG: @_Z12var_templateILi2EE = global %[[STRUCT_TYPE]] zeroinitializer
+
+template<typename T, int I> int partial_var_template_specialization_shouldnt_hit_codegen;
+template<typename T> int partial_var_template_specialization_shouldnt_hit_codegen<T, 123>;
+template<int I> float partial_var_template_specialization_shouldnt_hit_codegen<float, I>;
+
+// CIR-NOT: partial_var_template_specialization_shouldnt_hit_codegen
+// LLVM-NOT: partial_var_template_specialization_shouldnt_hit_codegen
+// OGCG-NOT: partial_var_template_specialization_shouldnt_hit_codegen
diff --git a/clang/test/CodeGen/PowerPC/check-zero-vector.c b/clang/test/CodeGen/PowerPC/check-zero-vector.c
deleted file mode 100644
index cb6c826..0000000
--- a/clang/test/CodeGen/PowerPC/check-zero-vector.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1  -triple powerpc64-ibm-aix -emit-llvm %s -o -  | FileCheck %s --check-prefix=POWERPC_64
-// RUN: %clang_cc1  -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o -  | FileCheck %s --check-prefix=POWERPC_64LE
-// RUN: %clang_cc1  -triple powerpc-ibm-aix -emit-llvm %s -o -  | FileCheck %s --check-prefix=POWERPC_32
-
-// POWERPC_64-LABEL: define signext i32 @test_Greater_than(
-// POWERPC_64-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_64-NEXT:  [[ENTRY:.*:]]
-// POWERPC_64-NEXT:    [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
-// POWERPC_64-NEXT:    [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_64-NEXT:    [[I:%.*]] = alloca i32, align 4
-// POWERPC_64-NEXT:    store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64-NEXT:    store i16 0, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT:    store i32 0, ptr [[I]], align 4
-// POWERPC_64-NEXT:    br label %[[FOR_COND:.*]]
-// POWERPC_64:       [[FOR_COND]]:
-// POWERPC_64-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_64-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_64:       [[FOR_BODY]]:
-// POWERPC_64-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
-// POWERPC_64-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
-// POWERPC_64-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_64-NEXT:    [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_64-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_64-NEXT:    br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_64:       [[IF_THEN]]:
-// POWERPC_64-NEXT:    [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT:    [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_64-NEXT:    store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_64-NEXT:    br label %[[IF_END]]
-// POWERPC_64:       [[IF_END]]:
-// POWERPC_64-NEXT:    br label %[[FOR_INC:.*]]
-// POWERPC_64:       [[FOR_INC]]:
-// POWERPC_64-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT:    [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_64-NEXT:    store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_64-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_64:       [[FOR_END]]:
-// POWERPC_64-NEXT:    [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT:    [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_64-NEXT:    ret i32 [[CONV4]]
-//
-// POWERPC_64LE-LABEL: define dso_local signext i32 @test_Greater_than(
-// POWERPC_64LE-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_64LE-NEXT:  [[ENTRY:.*:]]
-// POWERPC_64LE-NEXT:    [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
-// POWERPC_64LE-NEXT:    [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_64LE-NEXT:    [[I:%.*]] = alloca i32, align 4
-// POWERPC_64LE-NEXT:    store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64LE-NEXT:    store i16 0, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT:    store i32 0, ptr [[I]], align 4
-// POWERPC_64LE-NEXT:    br label %[[FOR_COND:.*]]
-// POWERPC_64LE:       [[FOR_COND]]:
-// POWERPC_64LE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_64LE-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_64LE:       [[FOR_BODY]]:
-// POWERPC_64LE-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64LE-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
-// POWERPC_64LE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
-// POWERPC_64LE-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_64LE-NEXT:    [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_64LE-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_64LE-NEXT:    br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_64LE:       [[IF_THEN]]:
-// POWERPC_64LE-NEXT:    [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT:    [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_64LE-NEXT:    store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT:    br label %[[IF_END]]
-// POWERPC_64LE:       [[IF_END]]:
-// POWERPC_64LE-NEXT:    br label %[[FOR_INC:.*]]
-// POWERPC_64LE:       [[FOR_INC]]:
-// POWERPC_64LE-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT:    [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_64LE-NEXT:    store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_64LE-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_64LE:       [[FOR_END]]:
-// POWERPC_64LE-NEXT:    [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT:    [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_64LE-NEXT:    ret i32 [[CONV4]]
-//
-// POWERPC_32-LABEL: define i32 @test_Greater_than(
-// POWERPC_32-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_32-NEXT:  [[ENTRY:.*:]]
-// POWERPC_32-NEXT:    [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 4
-// POWERPC_32-NEXT:    [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_32-NEXT:    [[I:%.*]] = alloca i32, align 4
-// POWERPC_32-NEXT:    store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 4
-// POWERPC_32-NEXT:    store i16 0, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT:    store i32 0, ptr [[I]], align 4
-// POWERPC_32-NEXT:    br label %[[FOR_COND:.*]]
-// POWERPC_32:       [[FOR_COND]]:
-// POWERPC_32-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_32-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_32:       [[FOR_BODY]]:
-// POWERPC_32-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 4
-// POWERPC_32-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 [[TMP2]]
-// POWERPC_32-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_32-NEXT:    [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_32-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_32-NEXT:    br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_32:       [[IF_THEN]]:
-// POWERPC_32-NEXT:    [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT:    [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_32-NEXT:    store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_32-NEXT:    br label %[[IF_END]]
-// POWERPC_32:       [[IF_END]]:
-// POWERPC_32-NEXT:    br label %[[FOR_INC:.*]]
-// POWERPC_32:       [[FOR_INC]]:
-// POWERPC_32-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT:    [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_32-NEXT:    store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_32-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_32:       [[FOR_END]]:
-// POWERPC_32-NEXT:    [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT:    [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_32-NEXT:    ret i32 [[CONV4]]
-//
-int test_Greater_than(unsigned short *colauths) {
-  unsigned short result = 0;
-  for (int i = 0; i < 4; i++) {
-    if (colauths[i] > 0) {
-      result++;
-    }
-  }
-  return result;
-}
-//.
-// POWERPC_64: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_64: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
-// POWERPC_64LE: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_64LE: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
-// POWERPC_32: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_32: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index ed39862..2d43764 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -44,12 +44,14 @@ __m256d test_mm256_and_pd(__m256d A, __m256d B) {
   // CHECK: and <4 x i64>
   return _mm256_and_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_and_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -0.0, -0.0, +0.0, +7.0));
 
 __m256 test_mm256_and_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_and_ps
   // CHECK: and <8 x i32>
   return _mm256_and_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_and_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f));
 
 __m256d test_mm256_andnot_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_andnot_pd
@@ -57,6 +59,7 @@ __m256d test_mm256_andnot_pd(__m256d A, __m256d B) {
   // CHECK: and <4 x i64>
   return _mm256_andnot_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_andnot_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), +0.0, +0.0, +0.0, +0.0));
 
 __m256 test_mm256_andnot_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_andnot_ps
@@ -64,6 +67,7 @@ __m256 test_mm256_andnot_ps(__m256 A, __m256 B) {
   // CHECK: and <8 x i32>
   return _mm256_andnot_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_andnot_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
 
 __m256d test_mm256_blend_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_blend_pd
@@ -1258,18 +1262,21 @@ __m256d test_mm256_movedup_pd(__m256d A) {
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   return _mm256_movedup_pd(A);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_movedup_pd((__m256d){+7.0, -7.0, -42.0, +42.0}), +7.0, +7.0, -42.0, -42.0));
 
 __m256 test_mm256_movehdup_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_movehdup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   return _mm256_movehdup_ps(A);
 }
+TEST_CONSTEXPR(match_m256(_mm256_movehdup_ps((__m256){+1.0f,-1.0f,+2.0f,+4.0f,+8.0f,-8.0f,-3.0f,+3.0f}), -1.0f, -1.0f, +4.0f, +4.0f, -8.0f, -8.0f, +3.0f, +3.0f));
 
 __m256 test_mm256_moveldup_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_moveldup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   return _mm256_moveldup_ps(A);
 }
+TEST_CONSTEXPR(match_m256(_mm256_moveldup_ps((__m256){+1.0f,-1.0f,+2.0f,+4.0f,+8.0f,-8.0f,-3.0f,+3.0f}), +1.0f, +1.0f, +2.0f, +2.0f, +8.0f, +8.0f, -3.0f, -3.0f));
 
 int test_mm256_movemask_pd(__m256d A) {
   // CHECK-LABEL: test_mm256_movemask_pd
@@ -1300,12 +1307,14 @@ __m256d test_mm256_or_pd(__m256d A, __m256d B) {
   // CHECK: or <4 x i64>
   return _mm256_or_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_or_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -4.0, -5.0, -6.0, +7.0));
 
 __m256 test_mm256_or_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_or_ps
   // CHECK: or <8 x i32>
   return _mm256_or_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_or_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f));
 
 __m128d test_mm_permute_pd(__m128d A) {
   // CHECK-LABEL: test_mm_permute_pd
@@ -1443,6 +1452,7 @@ __m256i test_mm256_set_epi8(char A0, char A1, char A2, char A3, char A4, char A5
   // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 31
   return _mm256_set_epi8(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31);
 }
+TEST_CONSTEXPR(match_v32qi(_mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
 
 __m256i test_mm256_set_epi16(short A0, short A1, short A2, short A3, short A4, short A5, short A6, short A7,
                              short A8, short A9, short A10, short A11, short A12, short A13, short A14, short A15) {
@@ -1465,6 +1475,7 @@ __m256i test_mm256_set_epi16(short A0, short A1, short A2, short A3, short A4, s
   // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 15
   return _mm256_set_epi16(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_set_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15), -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0));
 
 __m256i test_mm256_set_epi32(int A0, int A1, int A2, int A3, int A4, int A5, int A6, int A7) {
   // CHECK-LABEL: test_mm256_set_epi32
@@ -1478,6 +1489,7 @@ __m256i test_mm256_set_epi32(int A0, int A1, int A2, int A3, int A4, int A5, int
   // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7
   return _mm256_set_epi32(A0, A1, A2, A3, A4, A5, A6, A7);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_set_epi32(1, -3, 5, -7, 9, -11, 13, -15), -15, 13, -11, 9, -7, 5, -3, 1));
 
 __m256i test_mm256_set_epi64x(long long A0, long long A1, long long A2, long long A3) {
   // CHECK-LABEL: test_mm256_set_epi64x
@@ -1487,6 +1499,7 @@ __m256i test_mm256_set_epi64x(long long A0, long long A1, long long A2, long lon
   // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3
   return _mm256_set_epi64x(A0, A1, A2, A3);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_set_epi64x(100, -1000, 2000, -200), -200, 2000, -1000, 100));
 
 __m256 test_mm256_set_m128(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm256_set_m128
@@ -1566,6 +1579,7 @@ __m256i test_mm256_set1_epi8(char A) {
   // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 31
   return _mm256_set1_epi8(A);
 }
+TEST_CONSTEXPR(match_v32qi(_mm256_set1_epi8(99), 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99));
 
 __m256i test_mm256_set1_epi16(short A) {
   // CHECK-LABEL: test_mm256_set1_epi16
@@ -1587,6 +1601,7 @@ __m256i test_mm256_set1_epi16(short A) {
   // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 15
   return _mm256_set1_epi16(A);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_set1_epi16(-128), -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128));
 
 __m256i test_mm256_set1_epi32(int A) {
   // CHECK-LABEL: test_mm256_set1_epi32
@@ -1600,6 +1615,7 @@ __m256i test_mm256_set1_epi32(int A) {
   // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7
   return _mm256_set1_epi32(A);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_set1_epi32(55), 55, 55, 55, 55, 55, 55, 55, 55));
 
 __m256i test_mm256_set1_epi64x(long long A) {
   // CHECK-LABEL: test_mm256_set1_epi64x
@@ -1609,6 +1625,7 @@ __m256i test_mm256_set1_epi64x(long long A) {
   // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3
   return _mm256_set1_epi64x(A);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_set1_epi64x(-65535), -65535, -65535, -65535, -65535));
 
 __m256d test_mm256_set1_pd(double A) {
   // CHECK-LABEL: test_mm256_set1_pd
@@ -1673,6 +1690,7 @@ __m256i test_mm256_setr_epi8(char A0, char A1, char A2, char A3, char A4, char A
   // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 31
   return _mm256_setr_epi8(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31);
 }
+TEST_CONSTEXPR(match_v32qi(_mm256_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
 
 __m256i test_mm256_setr_epi16(short A0, short A1, short A2, short A3, short A4, short A5, short A6, short A7,
                               short A8, short A9, short A10, short A11, short A12, short A13, short A14, short A15) {
@@ -1695,6 +1713,7 @@ __m256i test_mm256_setr_epi16(short A0, short A1, short A2, short A3, short A4,
   // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 15
   return _mm256_setr_epi16(A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15), 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15));
 
 __m256i test_mm256_setr_epi32(int A0, int A1, int A2, int A3, int A4, int A5, int A6, int A7) {
   // CHECK-LABEL: test_mm256_setr_epi32
@@ -1708,6 +1727,7 @@ __m256i test_mm256_setr_epi32(int A0, int A1, int A2, int A3, int A4, int A5, in
   // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7
   return _mm256_setr_epi32(A0, A1, A2, A3, A4, A5, A6, A7);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_setr_epi32(1, -3, 5, -7, 9, -11, 13, -15), 1, -3, 5, -7, 9, -11, 13, -15));
 
 __m256i test_mm256_setr_epi64x(long long A0, long long A1, long long A2, long long A3) {
   // CHECK-LABEL: test_mm256_setr_epi64x
@@ -1717,6 +1737,7 @@ __m256i test_mm256_setr_epi64x(long long A0, long long A1, long long A2, long lo
   // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3
   return _mm256_setr_epi64x(A0, A1, A2, A3);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_setr_epi64x(100, -1000, 2000, -200), 100, -1000, 2000, -200));
 
 __m256 test_mm256_setr_m128(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm256_setr_m128
@@ -2065,12 +2086,14 @@ __m256d test_mm256_xor_pd(__m256d A, __m256d B) {
   // CHECK: xor <4 x i64>
   return _mm256_xor_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_xor_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -4.0, +5.0, -6.0, +0.0));
 
 __m256 test_mm256_xor_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_xor_ps
   // CHECK: xor <8 x i32>
   return _mm256_xor_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_xor_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f));
 
 void test_mm256_zeroall(void) {
   // CHECK-LABEL: test_mm256_zeroall
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 27da56f..1ed624c 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -9,6 +9,7 @@
 
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 // NOTE: This should match the tests in llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
 
@@ -97,6 +98,7 @@ __m256i test_mm256_and_si256(__m256i a, __m256i b) {
   // CHECK: and <4 x i64>
   return _mm256_and_si256(a, b);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_and_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, 0, 0, -1));
 
 __m256i test_mm256_andnot_si256(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_andnot_si256
@@ -104,6 +106,7 @@ __m256i test_mm256_andnot_si256(__m256i a, __m256i b) {
   // CHECK: and <4 x i64>
   return _mm256_andnot_si256(a, b);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_andnot_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, 0, -1, 0));
 
 __m256i test_mm256_avg_epu8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_avg_epu8
@@ -920,6 +923,7 @@ __m256i test_mm256_or_si256(__m256i a, __m256i b) {
   // CHECK: or <4 x i64>
   return _mm256_or_si256(a, b);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_or_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, -1, -1, -1));
 
 __m256i test_mm256_packs_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_packs_epi16
@@ -1336,3 +1340,4 @@ __m256i test_mm256_xor_si256(__m256i a, __m256i b) {
   // CHECK: xor <4 x i64>
   return _mm256_xor_si256(a, b);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_xor_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, -1, -1, 0));
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 1ebd369..39fb92d 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -2,6 +2,7 @@
 
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __mmask8 test_knot_mask8(__mmask8 a) {
   // CHECK-LABEL: @test_knot_mask8
@@ -267,6 +268,7 @@ __m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d
   // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return (__m512d) _mm512_mask_xor_pd(__W, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_xor_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -4.0, +5.0, -6.0, +0.0, +0.0, -6.0, +5.0, -4.0));
 
 __m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_maskz_xor_pd
@@ -281,6 +283,7 @@ __m512 test_mm512_xor_ps (__m512 __A, __m512 __B) {
   // CHECK: xor <16 x i32>
   return (__m512) _mm512_xor_ps(__A, __B);
 }
+TEST_CONSTEXPR(match_m512(_mm512_xor_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f, -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f));
 
 __m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_xor_ps
@@ -303,6 +306,7 @@ __m512d test_mm512_or_pd (__m512d __A, __m512d __B) {
   // CHECK: or <8 x i64>
   return (__m512d) _mm512_or_pd(__A, __B);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_or_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -4.0, -5.0, -6.0, +7.0, +7.0, -6.0, -5.0, -4.0));
 
 __m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_or_pd
@@ -325,6 +329,7 @@ __m512 test_mm512_or_ps (__m512 __A, __m512 __B) {
   // CHECK: or <16 x i32>
   return (__m512) _mm512_or_ps(__A, __B);
 }
+TEST_CONSTEXPR(match_m512(_mm512_or_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f, -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f));
 
 __m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_or_ps
@@ -347,6 +352,7 @@ __m512d test_mm512_and_pd (__m512d __A, __m512d __B) {
   // CHECK: and <8 x i64>
   return (__m512d) _mm512_and_pd(__A, __B);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_and_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -0.0, -0.0, +0.0, +7.0, +7.0, +0.0, -0.0, -0.0));
 
 __m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_and_pd
@@ -369,6 +375,7 @@ __m512 test_mm512_and_ps (__m512 __A, __m512 __B) {
   // CHECK: and <16 x i32>
   return (__m512) _mm512_and_ps(__A, __B);
 }
+TEST_CONSTEXPR(match_m512(_mm512_and_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f));
 
 __m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_and_ps
@@ -392,6 +399,7 @@ __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) {
   // CHECK: and <8 x i64>
   return (__m512d) _mm512_andnot_pd(__A, __B);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_andnot_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), +0.0, +0.0, +0.0, +0.0, +0.0, +0.0, +0.0, +0.0));
 
 __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   // CHECK-LABEL: @test_mm512_mask_andnot_pd
@@ -415,6 +423,7 @@ __m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) {
   // CHECK: and <16 x i32>
   return (__m512) _mm512_andnot_ps(__A, __B);
 }
+TEST_CONSTEXPR(match_m512(_mm512_andnot_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
 
 __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: @test_mm512_mask_andnot_ps
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 84e700c..5447035 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -2811,36 +2811,42 @@ __m512i test_mm512_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i _
   // CHECK: and <16 x i32>
   return _mm512_and_epi32(__a, __b);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_and_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1));
 
 __m512i test_mm512_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) {
   // CHECK-LABEL: test_mm512_and_epi64
   // CHECK: and <8 x i64>
   return _mm512_and_epi64(__a, __b);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_and_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, 0, -1, 0, 0, 0, -1));
 
 __m512i test_mm512_or_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) {
   // CHECK-LABEL: test_mm512_or_epi32
   // CHECK: or <16 x i32>
   return _mm512_or_epi32(__a, __b);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_or_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1));
 
 __m512i test_mm512_or_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) {
   // CHECK-LABEL: test_mm512_or_epi64
   // CHECK: or <8 x i64>
   return _mm512_or_epi64(__a, __b);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_or_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, -1, 0, -1, -1, -1));
 
 __m512i test_mm512_xor_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) {
   // CHECK-LABEL: test_mm512_xor_epi32
   // CHECK: xor <16 x i32>
   return _mm512_xor_epi32(__a, __b);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_xor_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0));
 
 __m512i test_mm512_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) {
   // CHECK-LABEL: test_mm512_xor_epi64
   // CHECK: xor <8 x i64>
   return _mm512_xor_epi64(__a, __b);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_xor_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, 0, 0, -1, -1, 0));
 
 __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){
   // CHECK-LABEL: test_mm512_maskz_andnot_epi32
@@ -2869,6 +2875,7 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B)
 
   return _mm512_andnot_si512(__A, __B);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_andnot_si512((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0));
 
 __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_andnot_epi32
@@ -2876,6 +2883,7 @@ __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) {
   // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
   return _mm512_andnot_epi32(__A,__B);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_andnot_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0));
 
 __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_andnot_epi64
@@ -2900,6 +2908,7 @@ __m512i test_mm512_andnot_epi64(__m512i __A, __m512i __B) {
   // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
   return _mm512_andnot_epi64(__A,__B);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_andnot_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0));
 
 __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_maskz_sub_epi32
@@ -4386,6 +4395,7 @@ __m512d test_mm512_movedup_pd(__m512d __A) {
   // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   return _mm512_movedup_pd(__A);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_movedup_pd((__m512d){-1.0, +2.0, +3.0, +4.0, -5.0, -6.0, +7.0, +8.0}), -1.0, -1.0, +3.0, +3.0, -5.0, -5.0, +7.0, +7.0));
 
 __m512d test_mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A) {
   // CHECK-LABEL: test_mm512_mask_movedup_pd
@@ -8682,6 +8692,7 @@ __m512 test_mm512_movehdup_ps(__m512 __A) {
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
   return _mm512_movehdup_ps(__A);
 }
+TEST_CONSTEXPR(match_m512(_mm512_movehdup_ps((__m512){+1.0f,-1.0f,+2.0f,-2.0f,+3.0f,-3.0f,+4.0f,-4.0f,+5.0f,-5.0f,+6.0f,-6.0f,+7.0f,-7.0f,+8.0f,-8.0f}), -1.0f, -1.0f, -2.0f, -2.0f, -3.0f, -3.0f, -4.0f, -4.0f, -5.0f, -5.0f, -6.0f, -6.0f, -7.0f, -7.0f, -8.0f, -8.0f));
 
 __m512 test_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   // CHECK-LABEL: test_mm512_mask_movehdup_ps
@@ -8702,6 +8713,7 @@ __m512 test_mm512_moveldup_ps(__m512 __A) {
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
   return _mm512_moveldup_ps(__A);
 }
+TEST_CONSTEXPR(match_m512(_mm512_moveldup_ps((__m512){+1.0f,-1.0f,+2.0f,-2.0f,+3.0f,-3.0f,+4.0f,-4.0f,+5.0f,-5.0f,+6.0f,-6.0f,+7.0f,-7.0f,+8.0f,-8.0f}), +1.0f, +1.0f, +2.0f, +2.0f, +3.0f, +3.0f, +4.0f, +4.0f, +5.0f, +5.0f, +6.0f, +6.0f, +7.0f, +7.0f, +8.0f, +8.0f));
 
 __m512 test_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   // CHECK-LABEL: test_mm512_mask_moveldup_ps
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index 22a87ce..f719694 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -83,6 +83,22 @@ constexpr bool match_v8si(__m256i _v, int a, int b, int c, int d, int e, int f,
   return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
 }
 
+constexpr bool match_v16hi(__m256i _v, short a, short b, short c, short d, short e, short f, short g, short h, short i, short j, short k, short l, short m, short n, short o, short p) {
+  __v16hi v = (__v16hi)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_v32qi(__m256i _v, char __b00, char __b01, char __b02, char __b03, char __b04, char __b05, char __b06, char __b07,
+                                       char __b08, char __b09, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15,
+                                       char __b16, char __b17, char __b18, char __b19, char __b20, char __b21, char __b22, char __b23,
+                                       char __b24, char __b25, char __b26, char __b27, char __b28, char __b29, char __b30, char __b31) {
+  __v32qi v = (__v32qi)_v;
+  return v[ 0] == __b00 && v[ 1] == __b01 && v[ 2] == __b02 && v[ 3] == __b03 && v[ 4] == __b04 && v[ 5] == __b05 && v[ 6] == __b06 && v[ 7] ==  __b07 &&
+         v[ 8] == __b08 && v[ 9] == __b09 && v[10] == __b10 && v[11] == __b11 && v[12] == __b12 && v[13] == __b13 && v[14] == __b14 && v[15] ==  __b15 &&
+         v[16] == __b16 && v[17] == __b17 && v[18] == __b18 && v[19] == __b19 && v[20] == __b20 && v[21] == __b21 && v[22] == __b22 && v[23] ==  __b23 &&
+         v[24] == __b24 && v[25] == __b25 && v[26] == __b26 && v[27] == __b27 && v[28] == __b28 && v[29] == __b29 && v[30] == __b30 && v[31] ==  __b31;
+}
+
 constexpr bool match_m512(__m512 v, float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p) {
   return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
 }
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 52cbe45..30e2cfe 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -34,24 +34,28 @@ __m64 test_mm_add_pi8(__m64 a, __m64 b) {
   // CHECK: add <8 x i8> {{%.*}}, {{%.*}}
   return _mm_add_pi8(a, b);
 }
+TEST_CONSTEXPR(match_v8qi(_mm_add_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-18, +16, -14, +12, -10, +8, +6, -4)), -21, +18, -15, +12, -9, +6, +9, -8));
 
 __m64 test_mm_add_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_add_pi16
   // CHECK: add <4 x i16> {{%.*}}, {{%.*}}
   return _mm_add_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_add_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -9, +6, +9, -8));
 
 __m64 test_mm_add_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_add_pi32
   // CHECK: add <2 x i32> {{%.*}}, {{%.*}}
   return _mm_add_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v2si(_mm_add_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-9, +8}), -4, +5));
 
 __m64 test_mm_add_si64(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_add_si64
   // CHECK: add i64 {{%.*}}, {{%.*}}
   return _mm_add_si64(a, b);
 }
+TEST_CONSTEXPR(match_v1di(_mm_add_si64((__m64)(__v1di){+42}, (__m64)(__v1di){-100}), -58));
 
 __m64 test_mm_adds_pi8(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_adds_pi8
@@ -88,6 +92,7 @@ __m64 test_mm_and_si64(__m64 a, __m64 b) {
   // CHECK: and <1 x i64> {{%.*}}, {{%.*}}
   return _mm_and_si64(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_and_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, 0, 0, -1));
 
 __m64 test_mm_andnot_si64(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_andnot_si64
@@ -95,6 +100,7 @@ __m64 test_mm_andnot_si64(__m64 a, __m64 b) {
   // CHECK: and <1 x i64> [[TMP]], {{%.*}}
   return _mm_andnot_si64(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_andnot_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, 0, -1, 0));
 
 __m64 test_mm_avg_pu8(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_avg_pu8
@@ -114,6 +120,7 @@ __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) {
   // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
   return _mm_cmpeq_pi8(a, b);
 }
+TEST_CONSTEXPR(match_v8qi(_mm_cmpeq_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-3, -2, +1, 0, -1, -2, -3, -4)), -1, 0, 0, -1, 0, -1, 0, -1));
 
 __m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_cmpeq_pi16
@@ -121,6 +128,7 @@ __m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
   // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
   return _mm_cmpeq_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_cmpeq_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, -1, +3, +4}), 0, 0, -1, 0));
 
 __m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_cmpeq_pi32
@@ -128,6 +136,7 @@ __m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
   // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
   return _mm_cmpeq_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v2si(_mm_cmpeq_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-5, -3}), 0, -1));
 
 __m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_cmpgt_pi8
@@ -135,6 +144,7 @@ __m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
   // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8>
   return _mm_cmpgt_pi8(a, b);
 }
+TEST_CONSTEXPR(match_v8qi(_mm_cmpgt_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-3, -2, +1, 0, -1, -2, -3, -4)), 0, -1, 0, 0, -1, 0, -1, 0));
 
 __m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_cmpgt_pi16
@@ -142,6 +152,7 @@ __m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
   // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16>
   return _mm_cmpgt_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_cmpgt_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, -1, +3, +4}), -1, 0, 0, 0));
 
 __m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_cmpgt_pi32
@@ -149,6 +160,7 @@ __m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
   // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32>
   return _mm_cmpgt_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v2si(_mm_cmpgt_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-5, -3}), -1, 0));
 
 __m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) {
   // CHECK-LABEL: test_mm_cvt_pi2ps
@@ -210,12 +222,14 @@ __m64 test_mm_cvtsi32_si64(int a) {
   // CHECK: insertelement <2 x i32>
   return _mm_cvtsi32_si64(a);
 }
+TEST_CONSTEXPR(match_v2si(_mm_cvtsi32_si64(-127), -127, 0));
 
 int test_mm_cvtsi64_si32(__m64 a) {
   // CHECK-LABEL: test_mm_cvtsi64_si32
   // CHECK: extractelement <2 x i32>
   return _mm_cvtsi64_si32(a);
 }
+TEST_CONSTEXPR(_mm_cvtsi64_si32((__m64)(__v4hi){-2, 0, -1, -1}) == 65534);
 
 __m64 test_mm_cvttpd_pi32(__m128d a) {
   // CHECK-LABEL: test_mm_cvttpd_pi32
@@ -240,11 +254,13 @@ __m64 test_m_from_int(int a) {
   // CHECK: insertelement <2 x i32>
   return _m_from_int(a);
 }
+TEST_CONSTEXPR(match_v2si(_m_from_int(255), 255, 0));
 
 __m64 test_m_from_int64(long long a) {
   // CHECK-LABEL: test_m_from_int64
   return _m_from_int64(a);
 }
+TEST_CONSTEXPR(match_v1di(_m_from_int64(-65536), -65536LL));
 
 __m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_hadd_pi16
@@ -367,12 +383,14 @@ __m64 test_mm_mullo_pi16(__m64 a, __m64 b) {
   // CHECK: mul <4 x i16> {{%.*}}, {{%.*}}
   return _mm_mullo_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_mullo_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -10, -16, +18, +16));
 
 __m64 test_mm_or_si64(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_or_si64
   // CHECK: or <1 x i64> {{%.*}}, {{%.*}}
   return _mm_or_si64(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_or_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, -1, -1, -1));
 
 __m64 test_mm_packs_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_packs_pi16
@@ -644,24 +662,28 @@ __m64 test_mm_sub_pi8(__m64 a, __m64 b) {
   // CHECK: sub <8 x i8> {{%.*}}, {{%.*}}
   return _mm_sub_pi8(a, b);
 }
+TEST_CONSTEXPR(match_v8qi(_mm_sub_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-18, +16, -14, +12, -10, +8, +6, -4)), +15, -14, +13, -12, +11, -10, -3, 0));
 
 __m64 test_mm_sub_pi16(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_sub_pi16
   // CHECK: sub <4 x i16> {{%.*}}, {{%.*}}
   return _mm_sub_pi16(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_sub_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), +11, -10, -3, 0));
 
 __m64 test_mm_sub_pi32(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_sub_pi32
   // CHECK: sub <2 x i32> {{%.*}}, {{%.*}}
   return _mm_sub_pi32(a, b);
 }
+TEST_CONSTEXPR(match_v2si(_mm_sub_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-9, +8}), +14, -11));
 
 __m64 test_mm_sub_si64(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_sub_si64
   // CHECK: sub i64 {{%.*}}, {{%.*}}
   return _mm_sub_si64(a, b);
 }
+TEST_CONSTEXPR(match_v1di(_mm_sub_si64((__m64)(__v1di){+42}, (__m64)(__v1di){-100}), +142));
 
 __m64 test_mm_subs_pi8(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_subs_pi8
@@ -692,11 +714,13 @@ int test_m_to_int(__m64 a) {
   // CHECK: extractelement <2 x i32>
   return _m_to_int(a);
 }
+TEST_CONSTEXPR(_m_to_int((__m64)(__v4hi){0, -2, -1, -1}) == -131072);
 
 long long test_m_to_int64(__m64 a) {
   // CHECK-LABEL: test_m_to_int64
   return _m_to_int64(a);
 }
+TEST_CONSTEXPR(_m_to_int64((__m64)(__v4hi){0, -2, 0, -1}) == -281470681874432LL);
 
 __m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) {
   // CHECK-LABEL: test_mm_unpackhi_pi8
@@ -739,3 +763,4 @@ __m64 test_mm_xor_si64(__m64 a, __m64 b) {
   // CHECK: xor <1 x i64> {{%.*}}, {{%.*}}
   return _mm_xor_si64(a, b);
 }
+TEST_CONSTEXPR(match_v4hi(_mm_xor_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, -1, -1, 0));
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 04df59e..affb6e7 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -96,6 +96,7 @@ __m128i test_mm_and_si128(__m128i A, __m128i B) {
   // CHECK: and <2 x i64>
   return _mm_and_si128(A, B);
 }
+TEST_CONSTEXPR(match_v4si(_mm_and_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, 0, 0, -1));
 
 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_andnot_pd
@@ -111,6 +112,7 @@ __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
   // CHECK: and <2 x i64>
   return _mm_andnot_si128(A, B);
 }
+TEST_CONSTEXPR(match_v4si(_mm_andnot_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, 0, -1, 0));
 
 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_avg_epu8
@@ -963,6 +965,7 @@ __m128i test_mm_or_si128(__m128i A, __m128i B) {
   // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
   return _mm_or_si128(A, B);
 }
+TEST_CONSTEXPR(match_v4si(_mm_or_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, -1, -1, -1));
 
 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_packs_epi16
@@ -1831,3 +1834,4 @@ __m128i test_mm_xor_si128(__m128i A, __m128i B) {
   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
   return _mm_xor_si128(A, B);
 }
+TEST_CONSTEXPR(match_v4si(_mm_xor_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, -1, -1, 0));
diff --git a/clang/test/CodeGen/cfi-icall-generalize.c b/clang/test/CodeGen/cfi-icall-generalize.c
index 2011889..0af17e5 100644
--- a/clang/test/CodeGen/cfi-icall-generalize.c
+++ b/clang/test/CodeGen/cfi-icall-generalize.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=UNGENERALIZED %s
 // RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -fsanitize-cfi-icall-generalize-pointers -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=GENERALIZED %s
 
-// Test that const char* is generalized to const ptr and that const char** is
+// Test that const char* is generalized to const ptr and that char** is
 // generalized to ptr
 
 // CHECK: define{{.*}} ptr @f({{.*}} !type [[TYPE:![0-9]+]] !type [[TYPE_GENERALIZED:![0-9]+]]
diff --git a/clang/test/CodeGenCXX/dynamic-cast-exact-disabled.cpp b/clang/test/CodeGenCXX/dynamic-cast-exact-disabled.cpp
index 19c2a9b..3156e1b 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-exact-disabled.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-exact-disabled.cpp
@@ -14,3 +14,19 @@ B *exact(A *a) {
   // EXACT-NOT: call {{.*}} @__dynamic_cast
   return dynamic_cast<B*>(a);
 }
+
+struct C {
+  virtual ~C();
+};
+
+struct D final : private C {
+
+};
+
+// CHECK-LABEL: @_Z5exactP1C
+D *exact(C *a) {
+  // INEXACT: call {{.*}} @__dynamic_cast
+  // EXACT: entry:
+  // EXACT-NEXT: ret ptr null
+  return dynamic_cast<D*>(a);
+}
diff --git a/clang/test/CodeGenCXX/dynamic-cast-exact.cpp b/clang/test/CodeGenCXX/dynamic-cast-exact.cpp
index 86e1965..588d808 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-exact.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-exact.cpp
@@ -9,6 +9,7 @@ struct E : A { int e; };
 struct F : virtual A { int f; };
 struct G : virtual A { int g; };
 struct H final : C, D, E, F, G { int h; };
+struct H1 final: C, private D { int h1; };
 
 // CHECK-LABEL: @_Z7inexactP1A
 C *inexact(A *a) {
@@ -77,10 +78,49 @@ H *exact_multi(A *a) {
   return dynamic_cast<H*>(a);
 }
 
+// CHECK-LABEL: @_Z19exact_invalid_multiP1D
+H1 *exact_invalid_multi(D* d) {
+  // CHECK:      entry:
+  // CHECK-NEXT:   %d.addr = alloca ptr
+  // CHECK-NEXT:   store ptr %d, ptr %d.addr
+  // CHECK-NEXT:   load ptr, ptr %d.addr
+  // CHECK-NEXT:   ret ptr null
+  return dynamic_cast<H1*>(d);
+}
+
+// CHECK-LABEL: @_Z19exact_invalid_multiR1D
+H1 &exact_invalid_multi(D& d) {
+  // CHECK:      entry:
+  // CHECK-NEXT:   %d.addr = alloca ptr
+  // CHECK-NEXT:   store ptr %d, ptr %d.addr
+  // CHECK-NEXT:   load ptr, ptr %d.addr
+  // CHECK-NEXT:   call void @__cxa_bad_cast()
+  // CHECK-NEXT:   unreachable
+  // CHECK:      dynamic_cast.unreachable:
+  // CHECK-NEXT:   ret ptr poison
+  return dynamic_cast<H1&>(d);
+}
+
+namespace GH137518 {
+  class base { virtual void fn() = 0; };
+  class test final : base { virtual void fn() { } };
+  test* new_test() { return new test(); }
+
+  // CHECK-LABEL: @_ZN8GH1375184castEPNS_4baseE(
+  test* cast(base* b) {
+    // CHECK:      entry:
+    // CHECK-NEXT:   %b.addr = alloca ptr
+    // CHECK-NEXT:   store ptr %b, ptr %b.addr
+    // CHECK-NEXT:   load ptr, ptr %b.addr
+    // CHECK-NEXT:   ret ptr null
+    return dynamic_cast<test*>(b);
+  }
+}
+
 namespace GH64088 {
   // Ensure we mark the B vtable as used here, because we're going to emit a
   // reference to it.
-  // CHECK: define {{.*}} @_ZN7GH640881BD0
+  // CHECK: define {{.*}} void @_ZN7GH640881BD0Ev(
   struct A { virtual ~A(); };
   struct B final : A { virtual ~B() = default; };
   B *cast(A *p) { return dynamic_cast<B*>(p); }
diff --git a/clang/test/CodeGenCXX/mangle-class-nttp.cpp b/clang/test/CodeGenCXX/mangle-class-nttp.cpp
index 12c81f2..536592c 100644
--- a/clang/test/CodeGenCXX/mangle-class-nttp.cpp
+++ b/clang/test/CodeGenCXX/mangle-class-nttp.cpp
@@ -27,12 +27,14 @@ template void f<B{nullptr}>();
 // CHECK: define weak_odr void @_Z1fIXtl1BLPKi32EEEEvv(
 // MSABI: define {{.*}} @"??$f@$2UB@@PEBH0CA@H0A@@@@YAXXZ"
 template void f<B{fold((int*)32)}>();
-#ifndef _WIN32
-// FIXME: On MS ABI, we mangle this the same as nullptr, despite considering a
-// null pointer and zero bitcast to a pointer to be distinct pointer values.
-// CHECK: define weak_odr void @_Z1fIXtl1BrcPKiLi0EEEEvv(
-template void f<B{fold(reinterpret_cast<int*>(0))}>();
-#endif
+
+// CHECK: define weak_odr void @_Z1fIXtl1BLPKi0ELi2EEEEvv(
+// MSABI: define {{.*}} @"??$f@$2UB@@PEBH0A@H01@@@YAXXZ"(
+template void f<B{fold(reinterpret_cast<int*>(0)), 2}>();
+
+// CHECK: define weak_odr void @_Z1fIXtl1BLPKi12EEEEvv(
+// MSABI: define {{.*}} @"??$f@$2UB@@PEBH0M@H0A@@@@YAXXZ"(
+template void f<B{fold(reinterpret_cast<int*>(12))}>();
 
 // Pointers to subobjects.
 struct Nested { union { int k; int arr[2]; }; } nested[2];
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index ba23bc2..6206020 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -241,7 +241,7 @@
 // Check --offload-compress --offload-jobs=N does not cause warning.
 // RUN: %clang -### -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 // RUN:   --offload-arch=gfx1100 --offload-compress --offload-host-only -M %s \
-// RUN:   --offload-jobs=4
+// RUN:   --offload-jobs=4 --offload-new-driver
 
 // Check --offload-jobs=N option.
 
diff --git a/clang/test/SemaCXX/constexpr-value-init.cpp b/clang/test/SemaCXX/constexpr-value-init.cpp
index 3314174..18fa9cf 100644
--- a/clang/test/SemaCXX/constexpr-value-init.cpp
+++ b/clang/test/SemaCXX/constexpr-value-init.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -Wno-uninitialized -std=c++17 -fsyntax-only -verify
+// RUN: %clang_cc1 %s -Wno-uninitialized -std=c++17 -fsyntax-only -verify -fexperimental-new-constant-interpreter
 
 struct A {
   constexpr A() : a(b + 1), b(a + 1) {} // expected-note 5{{outside its lifetime}}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 6ae329a..765dcbc 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -4,8 +4,16 @@
 // RUN:            -verify %s -x objective-c++
 // RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
 // RUN:            -verify %s
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN:            -verify %s -DTEST_STD_NS
 
 typedef struct {} FILE;
+typedef unsigned int size_t;
+
+#ifdef TEST_STD_NS
+namespace std {
+#endif
+
 void memcpy();
 void __asan_memcpy();
 void strcpy();
@@ -25,6 +33,11 @@ int sscanf(const char * buffer, const char * format, ... );
 int wprintf(const wchar_t* format, ... );
 int __asan_printf();
 
+#ifdef TEST_STD_NS
+} //namespace std
+using namespace std;
+#endif
+
 namespace std {
   template< class InputIt, class OutputIt >
   OutputIt copy( InputIt first, InputIt last,
@@ -64,10 +77,6 @@ namespace std {
 
   typedef basic_string_view<char> string_view;
   typedef basic_string_view<wchar_t> wstring_view;
-
-  // C function under std:
-  void memcpy();
-  void strcpy();
 }
 
 void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
@@ -77,14 +86,16 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
   aligned_char_ptr_t cp;
 
   memcpy();                   // expected-warning{{function 'memcpy' is unsafe}}
-  std::memcpy();              // expected-warning{{function 'memcpy' is unsafe}}
   __builtin_memcpy(p, q, 64); // expected-warning{{function '__builtin_memcpy' is unsafe}}
   __builtin___memcpy_chk(p, q, 8, 64);  // expected-warning{{function '__builtin___memcpy_chk' is unsafe}}
   __asan_memcpy();                      // expected-warning{{function '__asan_memcpy' is unsafe}}
   strcpy();                   // expected-warning{{function 'strcpy' is unsafe}}
-  std::strcpy();              // expected-warning{{function 'strcpy' is unsafe}}
   strcpy_s();                 // expected-warning{{function 'strcpy_s' is unsafe}}
   wcscpy_s();                 // expected-warning{{function 'wcscpy_s' is unsafe}}
+#ifdef TEST_STD_NS
+  std::strcpy();              // expected-warning{{function 'strcpy' is unsafe}}
+  std::memcpy();              // expected-warning{{function 'memcpy' is unsafe}}
+#endif
 
   /* Test printfs */
   fprintf((FILE*)p, "%s%d", p, *p);  // expected-warning{{function 'fprintf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
@@ -181,13 +192,59 @@ void ff(char * p, char * q, std::span<char> s, std::span<char> s2) {
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-libc-call"
   memcpy();
-  std::memcpy();
   __builtin_memcpy(p, q, 64);
   __builtin___memcpy_chk(p, q, 8, 64);
   __asan_memcpy();
   strcpy();
+#ifdef TEST_STD_NS
   std::strcpy();
+  std::memcpy();
+#endif
   strcpy_s();
   wcscpy_s();
 #pragma clang diagnostic pop
 }
+
+
+
+// functions not in global scope or std:: namespace are not libc
+// functions regardless of their names:
+struct StrBuff
+{
+  void strcpy();
+  void strcpy(char* dst);
+  void memcpy(void *dst, const void *src, size_t size);
+};
+
+namespace NS {
+  void strcpy();
+  void strcpy(char* dst);
+  void memcpy(void *dst, const void *src, size_t size);
+}
+
+namespace std {
+  // class methods even in std namespace cannot be libc functions:
+  struct LibC
+  {
+    void strcpy();
+    void strcpy(char* dst);
+    void memcpy(void *dst, const void *src, size_t size);
+  };
+}
+
+void test(StrBuff& str)
+{
+  char buff[64];
+  str.strcpy();
+  str.strcpy(buff);
+  str.memcpy(buff, buff, 64);
+  NS::strcpy();
+  NS::strcpy(buff);
+  NS::memcpy(buff, buff, 64);
+
+  std::LibC LibC;
+
+  LibC.strcpy();
+  LibC.strcpy(buff);
+  LibC.memcpy(buff, buff, 64);
+}
diff --git a/clang/test/SemaOpenACC/private_firstprivate_reduction_required_ops.cpp b/clang/test/SemaOpenACC/private_firstprivate_reduction_required_ops.cpp
index 4644bde..38eb4e4 100644
--- a/clang/test/SemaOpenACC/private_firstprivate_reduction_required_ops.cpp
+++ b/clang/test/SemaOpenACC/private_firstprivate_reduction_required_ops.cpp
@@ -37,10 +37,12 @@ struct ImplicitDelDtor {
 };
 
 struct DeletedCopy {
+  DeletedCopy();
   DeletedCopy(const DeletedCopy&) = delete;
 };
 
 struct DefaultedCopy {
+  DefaultedCopy();
   DefaultedCopy(const DefaultedCopy&) = default;
 };
 struct UserCopy {
@@ -273,3 +275,20 @@ void firstprivate_arrays() {
 #pragma acc parallel firstprivate(UDCopyArr)
   ;
 }
+
+template<unsigned I>
+void non_const_array_templ() {
+  int CArr[I];
+
+#pragma acc parallel firstprivate(CArr)
+  ;
+}
+
+void non_const_arrays(int I) {
+  non_const_array_templ<5>();
+
+  int NCArr[I];
+  // expected-warning@+1{{variable of array type 'int[I]' referenced in OpenACC 'firstprivate' clause does not have constant bounds; initialization will happen after decay to pointer}}
+#pragma acc parallel firstprivate(NCArr)
+  ;
+}
diff --git a/clang/test/SemaOpenACC/sub-array.cpp b/clang/test/SemaOpenACC/sub-array.cpp
index 355ac5e..ec92975 100644
--- a/clang/test/SemaOpenACC/sub-array.cpp
+++ b/clang/test/SemaOpenACC/sub-array.cpp
@@ -61,13 +61,15 @@ void Func(int i, int j) {
 #pragma acc parallel private(ptr[3:])
   while (true);
 
-  // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is an array of unknown bound}}
+  // expected-error@+2{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is an array of unknown bound}}
+  // expected-warning@+1{{variable of array type 'int[i]' referenced in OpenACC 'private' clause does not have constant bounds; initialization will happen after decay to pointer}}
 #pragma acc parallel private(VLA[3:])
   while (true);
 
 #pragma acc parallel private(ptr[:3])
   while (true);
 
+  // expected-warning@+1{{variable of array type 'int[i]' referenced in OpenACC 'private' clause does not have constant bounds; initialization will happen after decay to pointer}}
 #pragma acc parallel private(VLA[:3])
   while (true);
 
@@ -159,11 +161,13 @@ void Templ(int i){
   // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is not an array}}
 #pragma acc parallel private(ptr[Conv:])
   while (true);
-  // expected-error@+1{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is an array of unknown bound}}
+  // expected-error@+2{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is an array of unknown bound}}
+  // expected-warning@+1{{variable of array type 'int[i]' referenced in OpenACC 'private' clause does not have constant bounds; initialization will happen after decay to pointer}}
 #pragma acc parallel private(VLA[Conv:])
   while (true);
 #pragma acc parallel private(ptr[:Conv])
   while (true);
+  // expected-warning@+1{{variable of array type 'int[i]' referenced in OpenACC 'private' clause does not have constant bounds; initialization will happen after decay to pointer}}
 #pragma acc parallel private(VLA[:Conv])
   while (true);
 
diff --git a/clang/test/SemaOpenCLCXX/amdgpu-nullptr.clcpp b/clang/test/SemaOpenCLCXX/amdgpu-nullptr.clcpp
new file mode 100644
index 0000000..5bc147c2
--- /dev/null
+++ b/clang/test/SemaOpenCLCXX/amdgpu-nullptr.clcpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple amdgcn -cl-std=clc++ -verify %s
+
+// expected-no-diagnostics
+
+#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
+static_assert(nullptr != fold(reinterpret_cast<private int*>(0)));
+
+static_assert(nullptr == (private int *)0);
+
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 95cda67..9493edf 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2895,8 +2895,10 @@ void OpenACCClauseEnqueue::VisitDeviceClause(const OpenACCDeviceClause &C) {
 void OpenACCClauseEnqueue::VisitFirstPrivateClause(
     const OpenACCFirstPrivateClause &C) {
   VisitVarList(C);
-  for (VarDecl *V : C.getInitRecipes())
-    Visitor.AddDecl(V);
+  for (const OpenACCFirstPrivateRecipe &R : C.getInitRecipes()) {
+    Visitor.AddDecl(R.RecipeDecl);
+    Visitor.AddDecl(R.InitFromTemporary);
+  }
 }
 
 void OpenACCClauseEnqueue::VisitPresentClause(const OpenACCPresentClause &C) {
@@ -2976,9 +2978,7 @@ void OpenACCClauseEnqueue::VisitIndependentClause(
     const OpenACCIndependentClause &C) {}
 void OpenACCClauseEnqueue::VisitSeqClause(const OpenACCSeqClause &C) {}
 void OpenACCClauseEnqueue::VisitNoHostClause(const OpenACCNoHostClause &C) {}
-void OpenACCClauseEnqueue::VisitBindClause(const OpenACCBindClause &C) {
-  assert(false && "TODO ERICH");
-}
+void OpenACCClauseEnqueue::VisitBindClause(const OpenACCBindClause &C) { }
 void OpenACCClauseEnqueue::VisitFinalizeClause(const OpenACCFinalizeClause &C) {
 }
 void OpenACCClauseEnqueue::VisitIfPresentClause(
@@ -4720,8 +4720,7 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) {
 
       std::string strRef(StrE->getString().str());
       result->EvalData.stringVal = new char[strRef.size() + 1];
-      strncpy((char *)result->EvalData.stringVal, strRef.c_str(),
-              strRef.size());
+      strncpy(result->EvalData.stringVal, strRef.c_str(), strRef.size());
       result->EvalData.stringVal[strRef.size()] = '\0';
       return result.release();
     }
@@ -4741,7 +4740,7 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) {
 
     std::string strRef(StrE->getString().str());
     result->EvalData.stringVal = new char[strRef.size() + 1];
-    strncpy((char *)result->EvalData.stringVal, strRef.c_str(), strRef.size());
+    strncpy(result->EvalData.stringVal, strRef.c_str(), strRef.size());
     result->EvalData.stringVal[strRef.size()] = '\0';
     return result.release();
   }
@@ -4760,7 +4759,7 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) {
         result->EvalType = CXEval_CFStr;
 
         result->EvalData.stringVal = new char[strLiteral.size() + 1];
-        strncpy((char *)result->EvalData.stringVal, strLiteral.c_str(),
+        strncpy(result->EvalData.stringVal, strLiteral.c_str(),
                 strLiteral.size());
         result->EvalData.stringVal[strLiteral.size()] = '\0';
         return result.release();
@@ -4785,7 +4784,7 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) {
         std::string strLiteral(S->getString().str());
         result->EvalType = CXEval_CFStr;
         result->EvalData.stringVal = new char[strLiteral.size() + 1];
-        strncpy((char *)result->EvalData.stringVal, strLiteral.c_str(),
+        strncpy(result->EvalData.stringVal, strLiteral.c_str(),
                 strLiteral.size());
         result->EvalData.stringVal[strLiteral.size()] = '\0';
         return result.release();