53 files changed, 1085 insertions, 2486 deletions
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 73aaaad..92fc938 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -451,6 +451,7 @@ Bug Fixes to Attribute Support
   ``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520)
 - Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075)
 - Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905)
+- Fix handling of parameter indexes when an attribute is applied to a C++23 explicit object member function.
 
 Bug Fixes to C++ Support
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/tools/dump_ast_matchers.py b/clang/docs/tools/dump_ast_matchers.py
index 46b7bb7..5db6826 100755
--- a/clang/docs/tools/dump_ast_matchers.py
+++ b/clang/docs/tools/dump_ast_matchers.py
@@ -6,11 +6,8 @@
 import collections
 import re
 import os
+from urllib.request import urlopen
 
-try:
-    from urllib.request import urlopen
-except ImportError:
-    from urllib2 import urlopen
 
 CLASS_INDEX_PAGE_URL = "https://clang.llvm.org/doxygen/classes.html"
 try:
diff --git a/clang/include/clang/AST/Attr.h b/clang/include/clang/AST/Attr.h
index ce273c1..14d7caa 100644
--- a/clang/include/clang/AST/Attr.h
+++ b/clang/include/clang/AST/Attr.h
@@ -16,6 +16,7 @@
 #include "clang/AST/ASTFwd.h"
 #include "clang/AST/AttrIterator.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/AttrKinds.h"
 #include "clang/Basic/AttributeCommonInfo.h"
@@ -327,8 +328,8 @@ public:
   ParamIdx(unsigned Idx, const Decl *D)
       : Idx(Idx), HasThis(false), IsValid(true) {
     assert(Idx >= 1 && "Idx must be one-origin");
-    if (const auto *FD = dyn_cast<FunctionDecl>(D))
-      HasThis = FD->isCXXInstanceMember();
+    if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(D))
+      HasThis = MethodDecl->isImplicitObjectMemberFunction();
   }
 
   /// A type into which \c ParamIdx can be serialized.
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td
index 275278c..062060e 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.td
+++ b/clang/include/clang/Basic/BuiltinsX86_64.td
@@ -239,57 +239,6 @@ let Features = "amx-complex", Attributes = [NoThrow] in {
   def tcmmrlfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
 }
 
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0rs_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0rst1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz1rs_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz1t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz1rst1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">;
-}
-
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def ttransposed_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">;
-}
-
-let Features = "amx-bf16,amx-transpose", Attributes = [NoThrow] in {
-  def ttdpbf16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-}
-
-let Features = "amx-fp16,amx-transpose", Attributes = [NoThrow] in {
-  def ttdpfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-}
-
-let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
-  def ttcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-  def ttcmmrlfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-  def tconjtcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-  def tconjtfp16_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">;
-}
-
 let Features = "amx-avx512,avx10.2", Attributes = [NoThrow] in {
   def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
   def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
@@ -303,10 +252,6 @@ let Features = "amx-tf32", Attributes = [NoThrow] in {
   def tmmultf32ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
 }
 
-let Features = "amx-tf32,amx-transpose", Attributes = [NoThrow] in {
-  def ttmmultf32ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
-}
-
 let Features = "amx-fp8", Attributes = [NoThrow] in {
   def tdpbf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
   def tdpbhf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">;
@@ -321,13 +266,6 @@ let Features = "amx-tile", Attributes = [NoThrow] in {
   def tilezero : X86Builtin<"void(unsigned char)">;
 }
 
-let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0rs : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz0rst1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz1rs : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz1rst1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-}
-
 let Features = "amx-movrs", Attributes = [NoThrow] in {
   def tileloaddrs64 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
   def tileloaddrst164 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
@@ -359,29 +297,6 @@ let Features = "amx-complex", Attributes = [NoThrow] in {
   def tcmmrlfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
 }
 
-let Features = "amx-transpose", Attributes = [NoThrow] in {
-  def t2rpntlvwz0 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz0t1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def t2rpntlvwz1t1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">;
-  def ttransposed : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">;
-}
-
-let Features = "amx-bf16,amx-transpose", Attributes = [NoThrow] in {
-  def ttdpbf16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-}
-
-let Features = "amx-fp16,amx-transpose", Attributes = [NoThrow] in {
-  def ttdpfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-}
-
-let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {
-  def ttcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-  def ttcmmrlfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-  def tconjtcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-  def tconjtfp16 : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">;
-}
-
 let Features = "amx-avx512,avx10.2", Attributes = [NoThrow] in {
   def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">;
   def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
@@ -406,10 +321,6 @@ let Features = "amx-tf32", Attributes = [NoThrow] in {
   def tmmultf32ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
 }
 
-let Features = "amx-tf32,amx-transpose", Attributes = [NoThrow] in {
-  def ttmmultf32ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">;
-}
-
 let Features = "prefetchi", Attributes = [NoThrow, Const] in {
   def prefetchi : X86Builtin<"void(void const *, unsigned int)">;
 }
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index cb5cb88..7f33f31 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6695,8 +6695,6 @@ def mamx_tf32 : Flag<["-"], "mamx-tf32">, Group<m_x86_Features_Group>;
 def mno_amx_tf32 : Flag<["-"], "mno-amx-tf32">, Group<m_x86_Features_Group>;
 def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>;
 def mno_amx_tile : Flag<["-"], "mno-amx-tile">, Group<m_x86_Features_Group>;
-def mamx_transpose : Flag<["-"], "mamx-transpose">, Group<m_x86_Features_Group>;
-def mno_amx_transpose : Flag<["-"], "mno-amx-transpose">, Group<m_x86_Features_Group>;
 def mamx_movrs: Flag<["-"], "mamx-movrs">, Group<m_x86_Features_Group>;
 def mno_amx_movrs: Flag<["-"], "mno-amx-movrs">, Group<m_x86_Features_Group>;
 def mcmpccxadd : Flag<["-"], "mcmpccxadd">, Group<m_x86_Features_Group>;
diff --git a/clang/include/clang/Sema/Attr.h b/clang/include/clang/Sema/Attr.h
index 3f0b102..5836231 100644
--- a/clang/include/clang/Sema/Attr.h
+++ b/clang/include/clang/Sema/Attr.h
@@ -123,6 +123,12 @@ inline bool isInstanceMethod(const Decl *D) {
   return false;
 }
 
+inline bool hasImplicitObjectParameter(const Decl *D) {
+  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(D))
+    return MethodDecl->isImplicitObjectMemberFunction();
+  return false;
+}
+
 /// Diagnose mutually exclusive attributes when present on a given
 /// declaration. Returns true if diagnosed.
 template <typename AttrTy>
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 52904c72..c67ed99 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2608,13 +2608,13 @@ public:
   };
 
   /// Given a function and its FormatAttr or FormatMatchesAttr info, attempts to
-  /// populate the FomatStringInfo parameter with the attribute's correct
+  /// populate the FormatStringInfo parameter with the attribute's correct
   /// format_idx and firstDataArg. Returns true when the format fits the
   /// function and the FormatStringInfo has been populated.
   static bool getFormatStringInfo(const Decl *Function, unsigned FormatIdx,
                                   unsigned FirstArg, FormatStringInfo *FSI);
   static bool getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
-                                  bool IsCXXMember, bool IsVariadic,
+                                  bool HasImplicitThisParam, bool IsVariadic,
                                   FormatStringInfo *FSI);
 
   // Used by C++ template instantiation.
@@ -5119,7 +5119,7 @@ public:
     // In C++ the implicit 'this' function parameter also counts.
     // Parameters are counted from one.
     bool HP = hasFunctionProto(D);
-    bool HasImplicitThisParam = isInstanceMethod(D);
+    bool HasImplicitThisParam = hasImplicitObjectParameter(D);
     bool IV = HP && isFunctionOrMethodVariadic(D);
     unsigned NumParams =
         (HP ? getFunctionOrMethodNumParams(D) : 0) + HasImplicitThisParam;
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 846b240..d2eb9c5 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -445,27 +445,17 @@ public:
     LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
     IntMaxType = SignedLong;
     Int64Type = SignedLong;
-    std::string DataLayout;
 
     if (Triple.isOSAIX()) {
       // TODO: Set appropriate ABI for AIX platform.
-      DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";
       LongDoubleWidth = 64;
       LongDoubleAlign = DoubleAlign = 32;
       LongDoubleFormat = &llvm::APFloat::IEEEdouble();
-    } else if ((Triple.getArch() == llvm::Triple::ppc64le)) {
-      DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64";
+    } else if ((Triple.getArch() == llvm::Triple::ppc64le) ||
+               Triple.isPPC64ELFv2ABI()) {
       ABI = "elfv2";
     } else {
-      DataLayout = "E-m:e";
-      if (Triple.isPPC64ELFv2ABI()) {
-        ABI = "elfv2";
-        DataLayout += "-Fn32";
-      } else {
-        ABI = "elfv1";
-        DataLayout += "-Fi64";
-      }
-      DataLayout += "-i64:64-i128:128-n32:64";
+      ABI = "elfv1";
     }
 
     if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() || Triple.isMusl()) {
@@ -473,14 +463,12 @@ public:
       LongDoubleFormat = &llvm::APFloat::IEEEdouble();
     }
 
-    if (Triple.isOSAIX() || Triple.isOSLinux())
-      DataLayout += "-S128-v256:256:256-v512:512:512";
-    resetDataLayout(DataLayout);
-
     // Newer PPC64 instruction sets support atomics up to 16 bytes.
     MaxAtomicPromoteWidth = 128;
     // Baseline PPC64 supports inlining atomics up to 8 bytes.
     MaxAtomicInlineWidth = 64;
+
+    calculateDataLayout();
   }
 
   void setMaxAtomicWidth() override {
@@ -495,10 +483,33 @@ public:
     return TargetInfo::CharPtrBuiltinVaList;
   }
 
+  void calculateDataLayout() {
+    std::string DataLayout;
+
+    if (getTriple().isOSAIX()) {
+      DataLayout = "E-m:a-Fi64-i64:64-i128:128-n32:64";
+    } else if ((getTriple().getArch() == llvm::Triple::ppc64le)) {
+      DataLayout = "e-m:e-Fn32-i64:64-i128:128-n32:64";
+    } else {
+      DataLayout = "E-m:e";
+      if (ABI == "elfv2") {
+        DataLayout += "-Fn32";
+      } else {
+        DataLayout += "-Fi64";
+      }
+      DataLayout += "-i64:64-i128:128-n32:64";
+    }
+
+    if (getTriple().isOSAIX() || getTriple().isOSLinux())
+      DataLayout += "-S128-v256:256:256-v512:512:512";
+    resetDataLayout(DataLayout);
+  }
+
   // PPC64 Linux-specific ABI options.
   bool setABI(const std::string &Name) override {
     if (Name == "elfv1" || Name == "elfv2") {
       ABI = Name;
+      calculateDataLayout();
       return true;
     }
     return false;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index e71f10c..7a90c89 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -396,8 +396,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAMXFP8 = true;
     } else if (Feature == "+amx-movrs") {
       HasAMXMOVRS = true;
-    } else if (Feature == "+amx-transpose") {
-      HasAMXTRANSPOSE = true;
     } else if (Feature == "+amx-avx512") {
       HasAMXAVX512 = true;
     } else if (Feature == "+amx-tf32") {
@@ -925,8 +923,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AMX_FP8__");
   if (HasAMXMOVRS)
     Builder.defineMacro("__AMX_MOVRS__");
-  if (HasAMXTRANSPOSE)
-    Builder.defineMacro("__AMX_TRANSPOSE__");
   if (HasAMXAVX512)
     Builder.defineMacro("__AMX_AVX512__");
   if (HasAMXTF32)
@@ -1068,7 +1064,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("amx-movrs", true)
       .Case("amx-tf32", true)
       .Case("amx-tile", true)
-      .Case("amx-transpose", true)
       .Case("avx", true)
       .Case("avx10.1", true)
       .Case("avx10.2", true)
@@ -1189,7 +1184,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("amx-movrs", HasAMXMOVRS)
       .Case("amx-tf32", HasAMXTF32)
       .Case("amx-tile", HasAMXTILE)
-      .Case("amx-transpose", HasAMXTRANSPOSE)
       .Case("avx", SSELevel >= AVX)
       .Case("avx10.1", HasAVX10_1)
       .Case("avx10.2", HasAVX10_2)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index be3a473..e7da262 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -160,7 +160,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasAMXCOMPLEX = false;
   bool HasAMXFP8 = false;
   bool HasAMXMOVRS = false;
-  bool HasAMXTRANSPOSE = false;
   bool HasAMXAVX512 = false;
   bool HasAMXTF32 = false;
   bool HasSERIALIZE = false;
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 71ff20a..5d5209b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -242,12 +242,19 @@ void CIRGenFunction::LexicalScope::cleanup() {
     }
   };
 
-  if (returnBlock != nullptr) {
-    // Write out the return block, which loads the value from `__retval` and
-    // issues the `cir.return`.
+  // Cleanup are done right before codegen resumes a scope. This is where
+  // objects are destroyed. Process all return blocks.
+  // TODO(cir): Handle returning from a switch statement through a cleanup
+  // block. We can't simply jump to the cleanup block, because the cleanup block
+  // is not part of the case region. Either reemit all cleanups in the return
+  // block or wait for MLIR structured control flow to support early exits.
+  llvm::SmallVector<mlir::Block *> retBlocks;
+  for (mlir::Block *retBlock : localScope->getRetBlocks()) {
     mlir::OpBuilder::InsertionGuard guard(builder);
-    builder.setInsertionPointToEnd(returnBlock);
-    (void)emitReturn(*returnLoc);
+    builder.setInsertionPointToEnd(retBlock);
+    retBlocks.push_back(retBlock);
+    mlir::Location retLoc = localScope->getRetLoc(retBlock);
+    emitReturn(retLoc);
   }
 
   auto insertCleanupAndLeave = [&](mlir::Block *insPt) {
@@ -274,19 +281,22 @@ void CIRGenFunction::LexicalScope::cleanup() {
 
     if (localScope->depth == 0) {
       // Reached the end of the function.
-      if (returnBlock != nullptr) {
-        if (returnBlock->getUses().empty()) {
-          returnBlock->erase();
+      // Special handling only for single return block case
+      if (localScope->getRetBlocks().size() == 1) {
+        mlir::Block *retBlock = localScope->getRetBlocks()[0];
+        mlir::Location retLoc = localScope->getRetLoc(retBlock);
+        if (retBlock->getUses().empty()) {
+          retBlock->erase();
         } else {
           // Thread return block via cleanup block.
           if (cleanupBlock) {
-            for (mlir::BlockOperand &blockUse : returnBlock->getUses()) {
+            for (mlir::BlockOperand &blockUse : retBlock->getUses()) {
               cir::BrOp brOp = mlir::cast<cir::BrOp>(blockUse.getOwner());
               brOp.setSuccessor(cleanupBlock);
             }
           }
 
-          cir::BrOp::create(builder, *returnLoc, returnBlock);
+          cir::BrOp::create(builder, retLoc, retBlock);
           return;
         }
       }
@@ -324,8 +334,10 @@ void CIRGenFunction::LexicalScope::cleanup() {
   bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
   if (!entryBlock && curBlock->empty()) {
     curBlock->erase();
-    if (returnBlock != nullptr && returnBlock->getUses().empty())
-      returnBlock->erase();
+    for (mlir::Block *retBlock : retBlocks) {
+      if (retBlock->getUses().empty())
+        retBlock->erase();
+    }
     return;
   }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index c3fcd1a6..e5cecaa5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1103,44 +1103,69 @@ public:
     // ---
 
   private:
-    // `returnBlock`, `returnLoc`, and all the functions that deal with them
-    // will change and become more complicated when `switch` statements are
-    // upstreamed.  `case` statements within the `switch` are in the same scope
-    // but have their own regions.  Therefore the LexicalScope will need to
-    // keep track of multiple return blocks.
-    mlir::Block *returnBlock = nullptr;
-    std::optional<mlir::Location> returnLoc;
-
-    // See the comment on `getOrCreateRetBlock`.
+    // On switches we need one return block per region, since cases don't
+    // have their own scopes but are distinct regions nonetheless.
+
+    // TODO: This implementation should change once we have support for early
+    //       exits in MLIR structured control flow (llvm-project#161575)
+    llvm::SmallVector<mlir::Block *> retBlocks;
+    llvm::DenseMap<mlir::Block *, mlir::Location> retLocs;
+    llvm::DenseMap<cir::CaseOp, unsigned> retBlockInCaseIndex;
+    std::optional<unsigned> normalRetBlockIndex;
+
+    // There's usually only one ret block per scope, but this needs to be
+    // get or create because of potential unreachable return statements, note
+    // that for those, all source location maps to the first one found.
     mlir::Block *createRetBlock(CIRGenFunction &cgf, mlir::Location loc) {
-      assert(returnBlock == nullptr && "only one return block per scope");
-      // Create the cleanup block but don't hook it up just yet.
+      assert((isa_and_nonnull<cir::CaseOp>(
+                  cgf.builder.getBlock()->getParentOp()) ||
+              retBlocks.size() == 0) &&
+             "only switches can hold more than one ret block");
+
+      // Create the return block but don't hook it up just yet.
       mlir::OpBuilder::InsertionGuard guard(cgf.builder);
-      returnBlock =
-          cgf.builder.createBlock(cgf.builder.getBlock()->getParent());
-      updateRetLoc(returnBlock, loc);
-      return returnBlock;
+      auto *b = cgf.builder.createBlock(cgf.builder.getBlock()->getParent());
+      retBlocks.push_back(b);
+      updateRetLoc(b, loc);
+      return b;
     }
 
     cir::ReturnOp emitReturn(mlir::Location loc);
     void emitImplicitReturn();
 
   public:
-    mlir::Block *getRetBlock() { return returnBlock; }
-    mlir::Location getRetLoc(mlir::Block *b) { return *returnLoc; }
-    void updateRetLoc(mlir::Block *b, mlir::Location loc) { returnLoc = loc; }
-
-    // Create the return block for this scope, or return the existing one.
-    // This get-or-create logic is necessary to handle multiple return
-    // statements within the same scope, which can happen if some of them are
-    // dead code or if there is a `goto` into the middle of the scope.
+    llvm::ArrayRef<mlir::Block *> getRetBlocks() { return retBlocks; }
+    mlir::Location getRetLoc(mlir::Block *b) { return retLocs.at(b); }
+    void updateRetLoc(mlir::Block *b, mlir::Location loc) {
+      retLocs.insert_or_assign(b, loc);
+    }
+
     mlir::Block *getOrCreateRetBlock(CIRGenFunction &cgf, mlir::Location loc) {
-      if (returnBlock == nullptr) {
-        returnBlock = createRetBlock(cgf, loc);
-        return returnBlock;
+      // Check if we're inside a case region
+      if (auto caseOp = mlir::dyn_cast_if_present<cir::CaseOp>(
+              cgf.builder.getBlock()->getParentOp())) {
+        auto iter = retBlockInCaseIndex.find(caseOp);
+        if (iter != retBlockInCaseIndex.end()) {
+          // Reuse existing return block
+          mlir::Block *ret = retBlocks[iter->second];
+          updateRetLoc(ret, loc);
+          return ret;
+        }
+        // Create new return block
+        mlir::Block *ret = createRetBlock(cgf, loc);
+        retBlockInCaseIndex[caseOp] = retBlocks.size() - 1;
+        return ret;
       }
-      updateRetLoc(returnBlock, loc);
-      return returnBlock;
+
+      if (normalRetBlockIndex) {
+        mlir::Block *ret = retBlocks[*normalRetBlockIndex];
+        updateRetLoc(ret, loc);
+        return ret;
+      }
+
+      mlir::Block *ret = createRetBlock(cgf, loc);
+      normalRetBlockIndex = retBlocks.size() - 1;
+      return ret;
     }
 
     mlir::Block *getEntryBlock() { return entryBlock; }
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index b924407..2381b2e 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -2931,74 +2931,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // instruction, but it will create a memset that won't be optimized away.
     return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
   }
-  // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
-  case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
-  case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
-    Intrinsic::ID IID;
-    switch (BuiltinID) {
-    default:
-      llvm_unreachable("Unsupported intrinsic!");
-    case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
-      IID = Intrinsic::x86_t2rpntlvwz0_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
-      IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz1_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
-      IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
-      break;
-    case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
-      IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
-      break;
-    }
-
-    // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
-    Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
-                                     {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
-
-    auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
-    assert(PtrTy && "arg3 must be of pointer type");
-    QualType PtreeTy = PtrTy->getPointeeType();
-    llvm::Type *TyPtee = ConvertType(PtreeTy);
-
-    // Bitcast amx type (x86_amx) to vector type (256 x i32)
-    // Then store tile0 into DstPtr0
-    Value *T0 = Builder.CreateExtractValue(Call, 0);
-    Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
-                                           {TyPtee}, {T0});
-    Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
-
-    // Then store tile1 into DstPtr1
-    Value *T1 = Builder.CreateExtractValue(Call, 1);
-    Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
-                                           {TyPtee}, {T1});
-    Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
-
-    // Note: Here we escape directly use x86_tilestored64_internal to store
-    // the results due to it can't make sure the Mem written scope. This may
-    // cause shapes reloads after first amx intrinsic, which current amx reg-
-    // ister allocation has no ability to handle it.
-
-    return Store;
-  }
   case X86::BI__ud2:
     // llvm.trap makes a ud2a instruction on x86.
     return EmitTrapCall(Intrinsic::trap);
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 15d0b35..abd049a 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -260,7 +260,8 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
   LangAS AS = QT->getUnqualifiedDesugaredType()->isNullPtrType()
                   ? LangAS::Default
                   : QT->getPointeeType().getAddressSpace();
-  if (AS == LangAS::Default || AS == LangAS::opencl_generic)
+  if (AS == LangAS::Default || AS == LangAS::opencl_generic ||
+      AS == LangAS::opencl_constant)
     return llvm::ConstantPointerNull::get(PT);
 
   auto &Ctx = CGM.getContext();
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 1858912..33fff76 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -162,18 +162,12 @@ set(x86_files
   adxintrin.h
   ammintrin.h
   amxavx512intrin.h
-  amxbf16transposeintrin.h
   amxcomplexintrin.h
-  amxcomplextransposeintrin.h
   amxfp16intrin.h
-  amxfp16transposeintrin.h
   amxfp8intrin.h
   amxintrin.h
   amxmovrsintrin.h
-  amxmovrstransposeintrin.h
   amxtf32intrin.h
-  amxtf32transposeintrin.h
-  amxtransposeintrin.h
   avx10_2_512bf16intrin.h
   avx10_2_512convertintrin.h
   avx10_2_512minmaxintrin.h
diff --git a/clang/lib/Headers/amxbf16transposeintrin.h b/clang/lib/Headers/amxbf16transposeintrin.h
deleted file mode 100644
index 86f09f2..0000000
--- a/clang/lib/Headers/amxbf16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_BF16TRANSPOSEINTRIN_H
-#define __AMX_BF16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-bf16,amx-transpose")))
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-///    tiles \a a and \a b, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in \a dst, and store the
-///    32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO (a.colsb / 4) - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) *
-///					FP32(b.row[k].bf16[2*n+0])
-///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) *
-///					FP32(b.row[k].bf16[2*n+1])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPBF16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k,
-                         _tile1024i dst, _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-///    tiles src0 and src1, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in "dst", and store the
-///    32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0,
-                                        __tile1024i src1) {
-  dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
-                                       src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_BF16TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxcomplextransposeintrin.h b/clang/lib/Headers/amxcomplextransposeintrin.h
deleted file mode 100644
index 11abaf9..0000000
--- a/clang/lib/Headers/amxcomplextransposeintrin.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H
-#define __AMX_COMPLEXTRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-complex,amx-transpose")))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles \a a and \a b is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-/// Calculates the imaginary part of the result. For each possible combination
-///    of (transposed column of \a a, column of \a b), it performs a set of
-///    multiplication and accumulations on all corresponding complex numbers
-///    (one from \a a and one from \a b). The imaginary part of the \a a element
-///    is multiplied with the real part of the corresponding \a b element, and
-///    the real part of the \a a element is multiplied with the imaginary part
-///    of the corresponding \a b elements. The two accumulated results are
-///    added, and then accumulated into the corresponding row and column of
-///    \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO a.rows - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tcmmimfp16ps(dst, a, b)                                          \
-  __builtin_ia32_ttcmmimfp16ps((dst), (a), (b))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles \a a and \a b is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-/// Calculates the real part of the result. For each possible combination
-///    of (rtransposed colum of \a a, column of \a b), it performs a set of
-///    multiplication and accumulations on all corresponding complex numbers
-///    (one from \a a and one from \a b). The real part of the \a a element is
-///    multiplied with the real part of the corresponding \a b element, and the
-///    negated imaginary part of the \a a element is multiplied with the
-///    imaginary part of the corresponding \a b elements. The two accumulated
-///    results are added, and then accumulated into the corresponding row and
-///    column of \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO a.rows - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0])
-///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tcmmrlfp16ps(dst, a, b)                                          \
-  __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b))
-
-/// Perform matrix conjugate transpose and multiplication of two tiles
-///    containing complex elements and accumulate the results into a packed
-///    single precision tile. Each dword element in input tiles \a a and \a b
-///    is interpreted as a complex number with FP16 real part and FP16 imaginary
-///    part.
-/// Calculates the imaginary part of the result. For each possible combination
-///    of (transposed column of \a a, column of \a b), it performs a set of
-///    multiplication and accumulations on all corresponding complex numbers
-///    (one from \a a and one from \a b). The negated imaginary part of the \a a
-///    element is multiplied with the real part of the corresponding \a b
-///    element, and the real part of the \a a element is multiplied with the
-///    imaginary part of the corresponding \a b elements. The two accumulated
-///    results are added, and then accumulated into the corresponding row and
-///    column of \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO a.rows - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
-///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_conjtcmmimfp16ps(dst, a, b)                                      \
-  __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b))
-
-/// Perform conjugate transpose of an FP16-pair of complex elements from \a a
-///    and writes the result to \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_conjtfp16(__tile dst, __tile a);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR i := 0 TO dst.rows - 1
-///	FOR j := 0 TO (dst.colsb / 4) - 1
-///		tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0]
-///		tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1]
-///	ENDFOR
-///	write_row_and_zero(dst, i, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCONJTFP16 instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The source tile. Max size is 1024 Bytes.
-#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a))
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal(
-    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
-    _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal(
-    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
-    _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal(
-    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
-    _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) {
-  return __builtin_ia32_tconjtfp16_internal(m, n, src);
-}
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles src0 and src1 is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-///    This function calculates the imaginary part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTCMMIMFP16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
-                                __tile1024i src1) {
-  dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col,
-                                          dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles src0 and src1 is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-///    This function calculates the real part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTCMMRLFP16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0,
-                                __tile1024i src1) {
-  dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col,
-                                          dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform matrix conjugate transpose and multiplication of two tiles
-///    containing complex elements and accumulate the results into a packed
-///    single precision tile. Each dword element in input tiles src0 and src1
-///    is interpreted as a complex number with FP16 real part and FP16 imaginary
-///    part.
-///    This function calculates the imaginary part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCONJTCMMIMFP16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
-                                    __tile1024i src1) {
-  dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col,
-                                              dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform conjugate transpose of an FP16-pair of complex elements from src and
-///    writes the result to dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCONJTFP16 </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src
-///    The source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) {
-  dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif // __x86_64__
-#endif // __AMX_COMPLEXTRANSPOSEINTRIN_H
diff --git a/clang/lib/Headers/amxfp16transposeintrin.h b/clang/lib/Headers/amxfp16transposeintrin.h
deleted file mode 100644
index 191f8c6..0000000
--- a/clang/lib/Headers/amxfp16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxfp16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_FP16TRANSPOSEINTRIN_H
-#define __AMX_FP16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-fp16,amx-transpose")))
-
-/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
-///    tiles \a a and \a b, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in \a dst, and store the
-///    32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO (a.colsb / 4) - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) *
-///					FP32(b.row[k].fp16[2*n+0])
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) *
-///					FP32(b.row[k].fp16[2*n+1])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPFP16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k,
-                         _tile1024i dst, _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
-///    tiles src0 and src1, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in "dst", and store the
-///    32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPFP16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0,
-                                        __tile1024i src1) {
-  dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile,
-                                       src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_FP16TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h
index a7da10d..208aa358 100644
--- a/clang/lib/Headers/amxintrin.h
+++ b/clang/lib/Headers/amxintrin.h
@@ -230,8 +230,6 @@ static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) {
 /// bytes. Since there is no 2D type in llvm IR, we use vector type to
 /// represent 2D tile and the fixed size is maximum amx tile register size.
 typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64)));
-typedef int _tile1024i_1024a
-    __attribute__((__vector_size__(1024), __aligned__(1024)));
 
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
diff --git a/clang/lib/Headers/amxmovrstransposeintrin.h b/clang/lib/Headers/amxmovrstransposeintrin.h
deleted file mode 100644
index 5f48cba..0000000
--- a/clang/lib/Headers/amxmovrstransposeintrin.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * ===-----------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H
-#define __AMX_MOVRS_TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-transpose,amx-movrs")))
-
-#define _tile_2rpntlvwz0rs(tdst, base, stride)                                 \
-  __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride)
-#define _tile_2rpntlvwz0rst1(tdst, base, stride)                               \
-  __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride)
-#define _tile_2rpntlvwz1rs(tdst, base, stride)                                 \
-  __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride)
-#define _tile_2rpntlvwz1rst1(tdst, base, stride)                               \
-  __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride)
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  // Use __tile1024i_1024a* to escape the alignment check in
-  // clang/test/Headers/x86-intrinsics-headers-clean.cpp
-  __builtin_ia32_t2rpntlvwz0rs_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz0rst1_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz1rs_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz1rst1_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1,
-                                const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                              &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1,
-                                  const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                                &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1,
-                                const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                              &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1,
-                                  const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                                &dst1->tile, base, stride);
-}
-
-#undef __DEFAULT_FN_ATTRS
-#endif /* __x86_64__ */
-#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/amxtf32transposeintrin.h b/clang/lib/Headers/amxtf32transposeintrin.h
deleted file mode 100644
index e1b90c1..0000000
--- a/clang/lib/Headers/amxtf32transposeintrin.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxtf32transposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_TF32TRANSPOSEINTRIN_H
-#define __AMX_TF32TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE                                      \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-tf32,amx-transpose")))
-
-/// \code
-/// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \
-///                        constexpr int b);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
-///
-/// \param srcdst
-/// 	The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// 	The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// 	The 2nd source tile. Max size is 1024 Bytes.
-///
-/// \code{.operation}
-/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) {
-/// 	dword[12:0] := 0
-/// 	dword[31:13] := x[31:13]
-/// 	return dword
-/// }
-///
-/// DEFINE silence_snan_fp32(x[31:0]) {
-/// 	IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0)
-/// 		x.fraction[22] := 1
-/// 	return x
-/// }
-///
-/// elements_dest:= srcdst.colsb/4
-///
-/// FOR m := 0 TO (srcdst.rows-1)
-/// 	tmp[511:0] := 0
-/// 	FOR k := 0 TO (a.rows-1)
-/// 		FOR n := 0 TO (elements_dest-1)
-/// 			a1e := silence_snan_fp32(a.row[k].fp32[m])
-/// 			a2e := silence_snan_fp32(b.row[k].fp32[n])
-/// 			s1e := zero_lower_mantissa_bits_fp32(a1e)
-/// 			s2e := zero_lower_mantissa_bits_fp32(a2e)
-/// 			tmp.fp32[n] += s1e * s2e
-/// 		ENDFOR
-/// 	ENDFOR
-///
-/// 	FOR n := 0 TO (elements_dest-1)
-/// 		tmp.fp32[n] += srcdst.row[m].fp32[n]
-/// 	ENDFOR
-///	write_row_and_zero(srcdst, m, tmp, srcdst.colsb)
-///
-/// ENDFOR
-///
-/// zero_upper_rows(srcdst, srcdst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-#define _tile_tmmultf32ps(srcdst, a, b)                                        \
-  __builtin_ia32_ttmmultf32ps((srcdst), (a), (b))
-
-// dst = m x n (srcdest), src1 = k x m, src2 = k x n
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE
-_tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k,
-                           _tile1024i dst, _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and do Matrix Multiplication of src0 and src1, and then do
-/// Matrix Plus with dst. All the calculation is base on float32 but with the
-/// lower 13-bit set to 0.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS_TF32_TRANSPOSE
-static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0,
-                               __tile1024i src1) {
-  dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col,
-                                         dst->tile, src0.tile, src1.tile);
-}
-
-#endif // __x86_64__
-#endif // __AMX_TF32TRANSPOSEINTRIN_H
diff --git a/clang/lib/Headers/amxtransposeintrin.h b/clang/lib/Headers/amxtransposeintrin.h
deleted file mode 100644
index b3fa37d..0000000
--- a/clang/lib/Headers/amxtransposeintrin.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * ===-----------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error "Never use <amxtransposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_TRANSPOSEINTRIN_H
-#define __AMX_TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_TRANSPOSE                                           \
-  __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose")))
-
-#define _tile_2rpntlvwz0(tdst, base, stride)                                   \
-  __builtin_ia32_t2rpntlvwz0(tdst, base, stride)
-#define _tile_2rpntlvwz0t1(tdst, base, stride)                                 \
-  __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride)
-#define _tile_2rpntlvwz1(tdst, base, stride)                                   \
-  __builtin_ia32_t2rpntlvwz1(tdst, base, stride)
-#define _tile_2rpntlvwz1t1(tdst, base, stride)                                 \
-  __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride)
-
-/// Transpose 32-bit elements from \a src and write the result to \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_transposed(__tile dst, __tile src);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
-///
-/// \param dst
-/// 	The destination tile. Max size is 1024 Bytes.
-/// \param src
-/// 	The source tile. Max size is 1024 Bytes.
-///
-/// \code{.operation}
-///
-/// FOR i := 0 TO (dst.rows-1)
-/// 	tmp[511:0] := 0
-/// 	FOR j := 0 TO (dst.colsb/4-1)
-/// 		tmp.dword[j] := src.row[j].dword[i]
-/// 	ENDFOR
-/// 	dst.row[i] := tmp
-/// ENDFOR
-///
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-#define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src)
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  // Use __tile1024i_1024a* to escape the alignment check in
-  // clang/test/Headers/x86-intrinsics-headers-clean.cpp
-  __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
-                                      (_tile1024i_1024a *)dst1, base,
-                                      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz0t1_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
-                                      (_tile1024i_1024a *)dst1, base,
-                                      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz1t1_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE
-_tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) {
-  return __builtin_ia32_ttransposed_internal(m, n, src);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1,
-                              const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                            &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1,
-                                const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                              &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1,
-                              const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                            &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1,
-                                const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                              &dst1->tile, base, stride);
-}
-
-/// Transpose 32-bit elements from src and write the result to dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src
-///    The source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_transposed(__tile1024i *dst, __tile1024i src) {
-  dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile);
-}
-
-#endif /* __x86_64__ */
-#endif /* __AMX_TRANSPOSEINTRIN_H */
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 35f012c..19064a4 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -475,24 +475,12 @@ _storebe_i64(void * __P, long long __D) {
 
 #include <amxfp8intrin.h>
 
-#include <amxtransposeintrin.h>
-
 #include <amxmovrsintrin.h>
 
-#include <amxmovrstransposeintrin.h>
-
 #include <amxavx512intrin.h>
 
 #include <amxtf32intrin.h>
 
-#include <amxtf32transposeintrin.h>
-
-#include <amxbf16transposeintrin.h>
-
-#include <amxfp16transposeintrin.h>
-
-#include <amxcomplextransposeintrin.h>
-
 #include <avx512vp2intersectintrin.h>
 
 #include <avx512vlvp2intersectintrin.h>
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f451787..ad2c2e4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3542,9 +3542,7 @@ bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) {
 
 bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,
                                unsigned FirstArg, FormatStringInfo *FSI) {
-  bool IsCXXMember = false;
-  if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
-    IsCXXMember = MD->isInstance();
+  bool HasImplicitThisParam = hasImplicitObjectParameter(D);
   bool IsVariadic = false;
   if (const FunctionType *FnTy = D->getFunctionType())
     IsVariadic = cast<FunctionProtoType>(FnTy)->isVariadic();
@@ -3553,11 +3551,12 @@ bool Sema::getFormatStringInfo(const Decl *D, unsigned FormatIdx,
   else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D))
     IsVariadic = OMD->isVariadic();
 
-  return getFormatStringInfo(FormatIdx, FirstArg, IsCXXMember, IsVariadic, FSI);
+  return getFormatStringInfo(FormatIdx, FirstArg, HasImplicitThisParam,
+                             IsVariadic, FSI);
 }
 
 bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
-                               bool IsCXXMember, bool IsVariadic,
+                               bool HasImplicitThisParam, bool IsVariadic,
                                FormatStringInfo *FSI) {
   if (FirstArg == 0)
     FSI->ArgPassingKind = FAPK_VAList;
@@ -3571,7 +3570,7 @@ bool Sema::getFormatStringInfo(unsigned FormatIdx, unsigned FirstArg,
   // The way the format attribute works in GCC, the implicit this argument
   // of member functions is counted. However, it doesn't appear in our own
   // lists, so decrement format_idx in that case.
-  if (IsCXXMember) {
+  if (HasImplicitThisParam) {
     if(FSI->FormatIdx == 0)
       return false;
     --FSI->FormatIdx;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 964a2a7..a9e7b44 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3785,7 +3785,7 @@ static bool handleFormatAttrCommon(Sema &S, Decl *D, const ParsedAttr &AL,
 
   // In C++ the implicit 'this' function parameter also counts, and they are
   // counted from one.
-  bool HasImplicitThisParam = isInstanceMethod(D);
+  bool HasImplicitThisParam = hasImplicitObjectParameter(D);
   Info->NumArgs = getFunctionOrMethodNumParams(D) + HasImplicitThisParam;
 
   Info->Identifier = AL.getArgAsIdent(0)->getIdentifierInfo();
@@ -3926,7 +3926,7 @@ static void handleCallbackAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  bool HasImplicitThisParam = isInstanceMethod(D);
+  bool HasImplicitThisParam = hasImplicitObjectParameter(D);
   int32_t NumArgs = getFunctionOrMethodNumParams(D);
 
   FunctionDecl *FD = D->getAsFunction();
@@ -4110,7 +4110,7 @@ static void handleLifetimeCaptureByAttr(Sema &S, Decl *D,
 }
 
 void Sema::LazyProcessLifetimeCaptureByParams(FunctionDecl *FD) {
-  bool HasImplicitThisParam = isInstanceMethod(FD);
+  bool HasImplicitThisParam = hasImplicitObjectParameter(FD);
   SmallVector<LifetimeCaptureByAttr *, 1> Attrs;
   for (ParmVarDecl *PVD : FD->parameters())
     if (auto *A = PVD->getAttr<LifetimeCaptureByAttr>())
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 850bcb1..2f61bdd 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -489,14 +489,6 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_tileloaddrst164:
   case X86::BI__builtin_ia32_tilestored64:
   case X86::BI__builtin_ia32_tilezero:
-  case X86::BI__builtin_ia32_t2rpntlvwz0:
-  case X86::BI__builtin_ia32_t2rpntlvwz0t1:
-  case X86::BI__builtin_ia32_t2rpntlvwz1:
-  case X86::BI__builtin_ia32_t2rpntlvwz1t1:
-  case X86::BI__builtin_ia32_t2rpntlvwz0rst1:
-  case X86::BI__builtin_ia32_t2rpntlvwz1rs:
-  case X86::BI__builtin_ia32_t2rpntlvwz1rst1:
-  case X86::BI__builtin_ia32_t2rpntlvwz0rs:
   case X86::BI__builtin_ia32_tcvtrowps2bf16h:
   case X86::BI__builtin_ia32_tcvtrowps2bf16l:
   case X86::BI__builtin_ia32_tcvtrowps2phh:
@@ -516,17 +508,8 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_tdpbhf8ps:
   case X86::BI__builtin_ia32_tdphbf8ps:
   case X86::BI__builtin_ia32_tdphf8ps:
-  case X86::BI__builtin_ia32_ttdpbf16ps:
-  case X86::BI__builtin_ia32_ttdpfp16ps:
-  case X86::BI__builtin_ia32_ttcmmimfp16ps:
-  case X86::BI__builtin_ia32_ttcmmrlfp16ps:
-  case X86::BI__builtin_ia32_tconjtcmmimfp16ps:
   case X86::BI__builtin_ia32_tmmultf32ps:
-  case X86::BI__builtin_ia32_ttmmultf32ps:
     return CheckBuiltinTileRangeAndDuplicate(TheCall, {0, 1, 2});
-  case X86::BI__builtin_ia32_ttransposed:
-  case X86::BI__builtin_ia32_tconjtfp16:
-    return CheckBuiltinTileArgumentsRange(TheCall, {0, 1});
   }
 }
 static bool isX86_32Builtin(unsigned BuiltinID) {
diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp
index 171c786..b4bdec1 100644
--- a/clang/lib/Tooling/Transformer/RangeSelector.cpp
+++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp
@@ -205,8 +205,12 @@ RangeSelector transformer::name(std::string ID) {
       // `foo<int>` for which this range will be too short.  Doing so will
       // require subcasing `NamedDecl`, because it doesn't provide virtual
       // access to the \c DeclarationNameInfo.
-      if (tooling::getText(R, *Result.Context) != D->getName())
-        return CharSourceRange();
+      StringRef Text = tooling::getText(R, *Result.Context);
+      if (Text != D->getName())
+        return llvm::make_error<StringError>(
+            llvm::errc::not_supported,
+            "range selected by name(node id=" + ID + "): '" + Text +
+                "' is different from decl name '" + D->getName() + "'");
       return R;
     }
     if (const auto *E = Node.get<DeclRefExpr>()) {
diff --git a/clang/test/CIR/CodeGen/switch.cpp b/clang/test/CIR/CodeGen/switch.cpp
index e13aa8f..3824be0 100644
--- a/clang/test/CIR/CodeGen/switch.cpp
+++ b/clang/test/CIR/CodeGen/switch.cpp
@@ -1183,3 +1183,90 @@ int nested_switch(int a) {
 // OGCG: [[IFEND10]]:
 // OGCG:   br label %[[EPILOG]]
 // OGCG: [[EPILOG]]:
+
+int sw_return_multi_cases(int x) {
+  switch (x) {
+  case 0:
+    return 0;
+  case 1:
+    return 1;
+  case 2:
+    return 2;
+  default:
+    return -1;
+  }
+}
+
+// CIR-LABEL: cir.func{{.*}} @_Z21sw_return_multi_casesi
+// CIR:       cir.switch (%{{.*}} : !s32i) {
+// CIR-NEXT:  cir.case(equal, [#cir.int<0> : !s32i]) {
+// CIR:         %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:         cir.store{{.*}} %[[ZERO]], %{{.*}} : !s32i, !cir.ptr<!s32i>
+// CIR:         %[[RET0:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:    cir.return %[[RET0]] : !s32i
+// CIR-NEXT:  }
+// CIR-NEXT:  cir.case(equal, [#cir.int<1> : !s32i]) {
+// CIR:         %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:         cir.store{{.*}} %[[ONE]], %{{.*}} : !s32i, !cir.ptr<!s32i>
+// CIR:         %[[RET1:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:    cir.return %[[RET1]] : !s32i
+// CIR-NEXT:  }
+// CIR-NEXT:  cir.case(equal, [#cir.int<2> : !s32i]) {
+// CIR:         %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+// CIR:         cir.store{{.*}} %[[TWO]], %{{.*}} : !s32i, !cir.ptr<!s32i>
+// CIR:         %[[RET2:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:    cir.return %[[RET2]] : !s32i
+// CIR-NEXT:  }
+// CIR-NEXT:  cir.case(default, []) {
+// CIR:         %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:         %[[NEG:.*]] = cir.unary(minus, %[[ONE]]) {{.*}} : !s32i, !s32i
+// CIR:         cir.store{{.*}} %[[NEG]], %{{.*}} : !s32i, !cir.ptr<!s32i>
+// CIR:         %[[RETDEF:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:    cir.return %[[RETDEF]] : !s32i
+// CIR-NEXT:  }
+// CIR-NEXT:  cir.yield
+
+// LLVM-LABEL: define{{.*}} i32 @_Z21sw_return_multi_casesi
+// LLVM:   switch i32 %{{.*}}, label %[[DEFAULT:.*]] [
+// LLVM-DAG:   i32 0, label %[[CASE0:.*]]
+// LLVM-DAG:   i32 1, label %[[CASE1:.*]]
+// LLVM-DAG:   i32 2, label %[[CASE2:.*]]
+// LLVM:   ]
+// LLVM: [[CASE0]]:
+// LLVM:   store i32 0, ptr %{{.*}}, align 4
+// LLVM:   %{{.*}} = load i32, ptr %{{.*}}, align 4
+// LLVM:   ret i32 %{{.*}}
+// LLVM: [[CASE1]]:
+// LLVM:   store i32 1, ptr %{{.*}}, align 4
+// LLVM:   %{{.*}} = load i32, ptr %{{.*}}, align 4
+// LLVM:   ret i32 %{{.*}}
+// LLVM: [[CASE2]]:
+// LLVM:   store i32 2, ptr %{{.*}}, align 4
+// LLVM:   %{{.*}} = load i32, ptr %{{.*}}, align 4
+// LLVM:   ret i32 %{{.*}}
+// LLVM: [[DEFAULT]]:
+// LLVM:   store i32 -1, ptr %{{.*}}, align 4
+// LLVM:   %{{.*}} = load i32, ptr %{{.*}}, align 4
+// LLVM:   ret i32 %{{.*}}
+
+// OGCG-LABEL: define{{.*}} i32 @_Z21sw_return_multi_casesi
+// OGCG: entry:
+// OGCG:   %[[RETVAL:.*]] = alloca i32, align 4
+// OGCG:   %[[X_ADDR:.*]] = alloca i32, align 4
+// OGCG:   %[[X_VAL:.*]] = load i32, ptr %[[X_ADDR]], align 4
+// OGCG:   switch i32 %[[X_VAL]], label %[[DEFAULT:.*]] [
+// OGCG-DAG:   i32 0, label %[[SW0:.*]]
+// OGCG-DAG:   i32 1, label %[[SW1:.*]]
+// OGCG-DAG:   i32 2, label %[[SW2:.*]]
+// OGCG:   ]
+// OGCG: [[SW0]]:
+// OGCG:   br label %[[RETURN:.*]]
+// OGCG: [[SW1]]:
+// OGCG:   br label %[[RETURN]]
+// OGCG: [[SW2]]:
+// OGCG:   br label %[[RETURN]]
+// OGCG: [[DEFAULT]]:
+// OGCG:   br label %[[RETURN]]
+// OGCG: [[RETURN]]:
+// OGCG:   %[[RETVAL_LOAD:.*]] = load i32, ptr %[[RETVAL]], align 4
+// OGCG:   ret i32 %[[RETVAL_LOAD]]
diff --git a/clang/test/CodeGen/PowerPC/ppc64-abi-override-datalayout.c b/clang/test/CodeGen/PowerPC/ppc64-abi-override-datalayout.c
new file mode 100644
index 0000000..30b85d2
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/ppc64-abi-override-datalayout.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -target-abi elfv2 %s -o - -emit-llvm | FileCheck %s
+
+// REQUIRES: powerpc-registered-target
+
+// Make sure that overriding the ABI to ELFv2 on a target that defaults to
+// ELFv1 changes the data layout:
+
+// CHECK: target datalayout = "E-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
diff --git a/clang/test/CodeGen/X86/amx_movrs_tranpose.c b/clang/test/CodeGen/X86/amx_movrs_tranpose.c
deleted file mode 100755
index 192c153..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_tranpose.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// RUN:  %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN:  -target-feature +amx-movrs  -emit-llvm -o - -Wall -Werror -pedantic \
-// RUN:  -target-feature +amx-transpose -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-char buf[2048];
-#define STRIDE 32
-
-// CHECK-LABEL:  define dso_local void @test_tile_2rpntlvwz0rs_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz0rs_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
-  _tile_2rpntlvwz0rs_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL:  define dso_local void @test_tile_2rpntlvwz0rst1_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz0rst1_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
-  _tile_2rpntlvwz0rst1_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL:  define dso_local void @test_tile_2rpntlvwz1rs_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz1rs_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
-  _tile_2rpntlvwz1rs_internal(row, col0, col1, D0, D1, B, 1);
-}
-
-// CHECK-LABEL:  define dso_local void @test_tile_2rpntlvwz1rst1_internal(
-// CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal(i16 %{{.*}}, i16 %{{.*}}, i16 %{{.*}}, ptr %{{.*}}, i64 %{{.*}})
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 0
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-// CHECK: store <256 x i32> %{{.*}}, ptr %{{.*}}, align 1024
-// CHECK: extractvalue { x86_amx, x86_amx } %{{.*}}, 1
-// CHECK: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %{{.*}})
-void test_tile_2rpntlvwz1rst1_internal(int row, int col0, int col1, void *D0, void *D1, void *B) {
-  _tile_2rpntlvwz1rst1_internal(row, col0, col1, D0, D1, B, 1);
-}
diff --git a/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c b/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c
deleted file mode 100755
index b174cc5..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_tranpose_api.c
+++ /dev/null
@@ -1,81 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-movrs  -emit-llvm -o - -Wall -Werror -pedantic \
-// RUN: -target-feature +amx-transpose -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-char buf[2048];
-#define STRIDE 32
-
-void test_tile_2rpntlvwz0rs(const void *A, size_t B) {
-  // CHECK-LABEL: @test_tile_2rpntlvwz0rs
-  // CHECK: call void @llvm.x86.t2rpntlvwz0rs(i8 1, ptr %{{.*}}, i64 %{{.*}})
-  _tile_2rpntlvwz0rs(1, A, B);
-}
-
-void test_tile_2rpntlvwz0rst1(const void *A, size_t B) {
-  // CHECK-LABEL: @test_tile_2rpntlvwz0rst1
-  // CHECK: call void @llvm.x86.t2rpntlvwz0rst1(i8 1, ptr %{{.*}}, i64 %{{.*}})
-  _tile_2rpntlvwz0rst1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1rs(const void *A, size_t B) {
-  // CHECK-LABEL: @test_tile_2rpntlvwz1rs
-  // CHECK: call void @llvm.x86.t2rpntlvwz1rs(i8 1, ptr %{{.*}}, i64 %{{.*}})
-  _tile_2rpntlvwz1rs(1, A, B);
-}
-
-void test_tile_2rpntlvwz1rst1(const void *A, size_t B) {
-  // CHECK-LABEL: @test_tile_2rpntlvwz1rst1
-  // CHECK: call void @llvm.x86.t2rpntlvwz1rst1(i8 1, ptr %{{.*}}, i64 %{{.*}})
-  _tile_2rpntlvwz1rst1(1, A, B);
-}
-
-void test__tile_2rpntlvwz0rs(__tile1024i dst0, __tile1024i dst1) {
-  //CHECK-LABEL: @test__tile_2rpntlvwz0rs
-  //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  __tile_2rpntlvwz0rs(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz0rst1(__tile1024i dst0, __tile1024i dst1) {
-  //CHECK-LABEL: @test__tile_2rpntlvwz0rst1
-  //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rst1.internal
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  __tile_2rpntlvwz0rst1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz1rs(__tile1024i dst0, __tile1024i dst1) {
-  //CHECK-LABEL: @test__tile_2rpntlvwz1rs
-  //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rs.internal
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  __tile_2rpntlvwz1rs(&dst0, &dst1, buf, STRIDE);
-}
-
-void test__tile_2rpntlvwz1rst1(__tile1024i dst0, __tile1024i dst1) {
-  //CHECK-LABEL: @test__tile_2rpntlvwz1rst1
-  //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1rst1.internal
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  __tile_2rpntlvwz1rst1(&dst0, &dst1, buf, STRIDE);
-}
diff --git a/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c b/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c
deleted file mode 100755
index 840b52b..0000000
--- a/clang/test/CodeGen/X86/amx_movrs_transpose_errors.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-int8 -target-feature +amx-transpose -target-feature +amx-movrs \
-// RUN: -verify
-
-#include <immintrin.h>
-#include <stddef.h>
-
-void test_tile_2rpntlvwz0rs(const void *A, size_t B) {
-  _tile_2rpntlvwz0rs(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz0rst1(const void *A, size_t B) {
-  _tile_2rpntlvwz0rst1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1rs(const void *A, size_t B) {
-  _tile_2rpntlvwz1rs(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1rst1(const void *A, size_t B) {
-  _tile_2rpntlvwz1rst1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32.c b/clang/test/CodeGen/X86/amx_tf32.c
index 661a9df..54ad6bb 100644
--- a/clang/test/CodeGen/X86/amx_tf32.c
+++ b/clang/test/CodeGen/X86/amx_tf32.c
@@ -10,8 +10,3 @@ void test_tile_mmultf32ps(void) {
   _tile_mmultf32ps(1, 2, 3);
 }
 
-void test_tile_tmmultf32ps(void) {
-  // CHECK-LABEL: @test_tile_tmmultf32ps(
-  // CHECK: call void @llvm.x86.ttmmultf32ps(i8 1, i8 2, i8 3)
-  _tile_tmmultf32ps(1, 2, 3);
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32_api.c b/clang/test/CodeGen/X86/amx_tf32_api.c
index 2ac8489..8f574b7 100644
--- a/clang/test/CodeGen/X86/amx_tf32_api.c
+++ b/clang/test/CodeGen/X86/amx_tf32_api.c
@@ -18,10 +18,3 @@ void test_tile_mmultf32ps(__tile1024i a, __tile1024i b, __tile1024i c) {
   __tile_mmultf32ps(&c, a, b);
 }
 
-void test_tile_tmmultf32ps(__tile1024i a, __tile1024i b, __tile1024i c) {
-  //CHECK-LABEL: @test_tile_tmmultf32ps
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
-  //CHECK-DAG: call x86_amx @llvm.x86.ttmmultf32ps.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  __tile_tmmultf32ps(&c, a, b);
-}
diff --git a/clang/test/CodeGen/X86/amx_tf32_errors.c b/clang/test/CodeGen/X86/amx_tf32_errors.c
index 4502130..f0fdd06 100644
--- a/clang/test/CodeGen/X86/amx_tf32_errors.c
+++ b/clang/test/CodeGen/X86/amx_tf32_errors.c
@@ -13,11 +13,3 @@ void test_tile_mmultf32ps() {
   _tile_mmultf32ps(1, 3, 3);  // expected-error {{tile arguments must refer to different tiles}}
 }
 
-void test_tile_tmmultf32ps() {
-  _tile_tmmultf32ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
-  _tile_tmmultf32ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
-  _tile_tmmultf32ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
-  _tile_tmmultf32ps(1, 1, 3);  // expected-error {{tile arguments must refer to different tiles}}
-  _tile_tmmultf32ps(1, 2, 1);  // expected-error {{tile arguments must refer to different tiles}}
-  _tile_tmmultf32ps(1, 2, 2);  // expected-error {{tile arguments must refer to different tiles}}
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose.c b/clang/test/CodeGen/X86/amx_transpose.c
deleted file mode 100644
index 7e88fd8..0000000
--- a/clang/test/CodeGen/X86/amx_transpose.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-transpose \
-// RUN: -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \
-// RUN: -target-feature +avx512f -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression| FileCheck %s
-
-#include <immintrin.h>
-#include <stddef.h>
-
-void test_tile_2rpntlvwz0(const void *A, size_t B) {
-  // CHECK-LABEL: @test_tile_2rpntlvwz0
-  // CHECK: call void @llvm.x86.t2rpntlvwz0(i8 1, ptr %{{.*}}, i64 %{{.*}})
-  _tile_2rpntlvwz0(1, A, B);
-}
-
-void test_tile_2rpntlvwz0t1(const void *A, size_t B) {
-  // CHECK-LABEL: @test_tile_2rpntlvwz0t1
-  // CHECK: call void @llvm.x86.t2rpntlvwz0t1(i8 1, ptr %{{.*}}, i64 %{{.*}})
-  _tile_2rpntlvwz0t1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1(const void *A, size_t B) {
-  // CHECK-LABEL: @test_tile_2rpntlvwz1
-  // CHECK: call void @llvm.x86.t2rpntlvwz1(i8 1, ptr %{{.*}}, i64 %{{.*}})
-  _tile_2rpntlvwz1(1, A, B);
-}
-
-void test_tile_2rpntlvwz1t1(const void *A, size_t B) {
-  // CHECK-LABEL: @test_tile_2rpntlvwz1t1
-  // CHECK: call void @llvm.x86.t2rpntlvwz1t1(i8 1, ptr %{{.*}}, i64 %{{.*}})
-  _tile_2rpntlvwz1t1(1, A, B);
-}
-
-void test_tile_transposed(void)
-{
-  // CHECK-LABEL: @test_tile_transposed
-  // CHECK: call void @llvm.x86.ttransposed(i8 1, i8 2)
-  _tile_transposed(1, 2);
-}
-
-void test_tile_tdpbf16ps(void)
-{
-  // CHECK-LABEL: @test_tile_tdpbf16ps
-  // CHECK: call void @llvm.x86.ttdpbf16ps(i8 1, i8 2, i8 3)
-  _tile_tdpbf16ps(1, 2, 3);
-}
-
-void test_tile_tdpfp16ps(void)
-{
-  // CHECK-LABEL: @test_tile_tdpfp16ps
-  // CHECK: call void @llvm.x86.ttdpfp16ps(i8 4, i8 5, i8 6)
-  _tile_tdpfp16ps(4, 5, 6);
-}
-
-void test_tile_tcmmimfp16ps(void) {
-  // CHECK-LABEL: @test_tile_tcmmimfp16ps
-  // CHECK: call void @llvm.x86.ttcmmimfp16ps(i8 1, i8 2, i8 3)
-  _tile_tcmmimfp16ps(1, 2, 3);
-}
-
-void test_tile_tcmmrlfp16ps(void) {
-  // CHECK-LABEL: @test_tile_tcmmrlfp16ps
-  // CHECK: call void @llvm.x86.ttcmmrlfp16ps(i8 1, i8 2, i8 3)
-  _tile_tcmmrlfp16ps(1, 2, 3);
-}
-
-void test_tile_conjtcmmimfp16ps(void) {
-  // CHECK-LABEL: @test_tile_conjtcmmimfp16ps
-  // CHECK: call void @llvm.x86.tconjtcmmimfp16ps(i8 1, i8 2, i8 3)
-  _tile_conjtcmmimfp16ps(1, 2, 3);
-}
-
-void test_tile_conjtfp16(void) {
-  // CHECK-LABEL: @test_tile_conjtfp16
-  // CHECK: call void @llvm.x86.tconjtfp16(i8 1, i8 2)
-  _tile_conjtfp16(1, 2);
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose_api.c b/clang/test/CodeGen/X86/amx_transpose_api.c
deleted file mode 100644
index dc3ef51..0000000
--- a/clang/test/CodeGen/X86/amx_transpose_api.c
+++ /dev/null
@@ -1,114 +0,0 @@
-// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f \
-// RUN: -target-feature +amx-transpose -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \
-// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
-
-#include <immintrin.h>
-
-char buf[2048];
-#define STRIDE 32
-
-char buf2[2048];
-
-void test_tile_2rpntlvwz0(__tile1024i dst0, __tile1024i dst1) {
-  //CHECK-LABEL: @test_tile_2rpntlvwz0
-  //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  __tile_2rpntlvwz0(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz0t1(__tile1024i dst0, __tile1024i dst1) {
-  //CHECK-LABEL: @test_tile_2rpntlvwz0t1
-  //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  __tile_2rpntlvwz0t1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz1(__tile1024i dst0, __tile1024i dst1) {
-  //CHECK-LABEL: @test_tile_2rpntlvwz1
-  //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  __tile_2rpntlvwz1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_2rpntlvwz1t1(__tile1024i dst0, __tile1024i dst1) {
-  //CHECK-LABEL: @test_tile_2rpntlvwz1t1
-  //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
-  //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
-  __tile_2rpntlvwz1t1(&dst0, &dst1, buf, STRIDE);
-}
-
-void test_tile_transposed(__tile1024i dst, __tile1024i src) {
-  //CHECK-LABEL: @test_tile_transposed
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
-  //CHECK-DAG: call x86_amx @llvm.x86.ttransposed.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  __tile_transposed(&dst, src);
-}
-
-void test_tile_tdpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
-  //CHECK-LABEL: @test_tile_tdpbf16ps
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
-  //CHECK-DAG: call x86_amx @llvm.x86.ttdpbf16ps.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  __tile_tdpbf16ps(&c, a, b);
-}
-
-void test_tile_tdpfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
-  //CHECK-LABEL: @test_tile_tdpfp16ps
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
-  //CHECK-DAG: call x86_amx @llvm.x86.ttdpfp16ps.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  __tile_tdpfp16ps(&c, a, b);
-}
-
-void test_tile_tcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
-  //CHECK-LABEL: @test_tile_tcmmimfp16ps
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
-  //CHECK-DAG: call x86_amx @llvm.x86.ttcmmimfp16ps.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  __tile_tcmmimfp16ps(&c, a, b);
-}
-
-void test_tile_tcmmrlfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
-  //CHECK-LABEL: @test_tile_tcmmrlfp16ps
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
-  //CHECK-DAG: call x86_amx @llvm.x86.ttcmmrlfp16ps.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  __tile_tcmmrlfp16ps(&c, a, b);
-}
-
-void test_tile_conjtcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
-  //CHECK-LABEL: @test_tile_conjtcmmimfp16ps
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
-  //CHECK-DAG: call x86_amx @llvm.x86.tconjtcmmimfp16ps.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  __tile_conjtcmmimfp16ps(&c, a, b);
-}
-
-void test_tile_conjtfp16(__tile1024i dst, __tile1024i src) {
-  //CHECK-LABEL: @test_tile_conjtfp16
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
-  //CHECK-DAG: call x86_amx @llvm.x86.tconjtfp16.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
-  __tile_conjtfp16(&dst, src);
-}
diff --git a/clang/test/CodeGen/X86/amx_transpose_errors.c b/clang/test/CodeGen/X86/amx_transpose_errors.c
deleted file mode 100644
index 80368c5..0000000
--- a/clang/test/CodeGen/X86/amx_transpose_errors.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
-// RUN: -target-feature +amx-int8 -target-feature +amx-bf16 -target-feature +amx-transpose \
-// RUN: -target-feature +avx512f -target-feature +amx-fp16 -target-feature +amx-complex -verify
-
-#include <immintrin.h>
-#include <stddef.h>
-
-// Transpose
-void test_tile_2rpntlvwz0(const void *A, size_t B) {
-  _tile_2rpntlvwz0(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz0t1(const void *A, size_t B) {
-  _tile_2rpntlvwz0t1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1(const void *A, size_t B) {
-  _tile_2rpntlvwz1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_2rpntlvwz1t1(const void *A, size_t B) {
-  _tile_2rpntlvwz1t1(8, A, B); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_tdpbf16ps()
-{
-  _tile_tdpbf16ps(8, 2, 3); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  _tile_tdpbf16ps(1, 8, 3); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  _tile_tdpbf16ps(1, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  _tile_tdpbf16ps(1, 1, 3);  // expected-error {{tile arguments must refer to different tiles}}
-  _tile_tdpbf16ps(1, 2, 1);  // expected-error {{tile arguments must refer to different tiles}}
-  _tile_tdpbf16ps(1, 2, 2);  // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_tdpfp16ps()
-{
-  _tile_tdpfp16ps(8, 5, 6); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  _tile_tdpfp16ps(1, 8, 6); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  _tile_tdpfp16ps(1, 5, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  _tile_tdpfp16ps(1, 1, 3);  // expected-error {{tile arguments must refer to different tiles}}
-  _tile_tdpfp16ps(1, 2, 1);  // expected-error {{tile arguments must refer to different tiles}}
-  _tile_tdpfp16ps(1, 2, 2);  // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_transposed()
-{
-  _tile_transposed(8, 2); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-  _tile_transposed(1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
-}
-
-void test_tile_tcmmimfp16ps() {
-  _tile_tcmmimfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
-  _tile_tcmmimfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
-  _tile_tcmmimfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
-  _tile_tcmmimfp16ps(1, 1, 3);  // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_tcmmrlfp16ps() {
-  _tile_tcmmrlfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
-  _tile_tcmmrlfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
-  _tile_tcmmrlfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
-  _tile_tcmmrlfp16ps(1, 1, 3);  // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_conjtcmmimfp16ps() {
-  _tile_conjtcmmimfp16ps(16, 2, 3); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
-  _tile_conjtcmmimfp16ps(1, 26, 3); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
-  _tile_conjtcmmimfp16ps(1, 2, 36); // expected-error {{argument value 36 is outside the valid range [0, 7]}}
-  _tile_conjtcmmimfp16ps(1, 2, 1);  // expected-error {{tile arguments must refer to different tiles}}
-}
-
-void test_tile_conjtfp16() {
-  _tile_conjtfp16(16, 2); // expected-error {{argument value 16 is outside the valid range [0, 7]}}
-  _tile_conjtfp16(1, 26); // expected-error {{argument value 26 is outside the valid range [0, 7]}}
-}
diff --git a/clang/test/CodeGenCXX/attr-callback.cpp b/clang/test/CodeGenCXX/attr-callback.cpp
index c3456d6c..efa705b 100644
--- a/clang/test/CodeGenCXX/attr-callback.cpp
+++ b/clang/test/CodeGenCXX/attr-callback.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -std=c++23 %s -emit-llvm -o - | FileCheck %s
 
 struct Base {
 
@@ -47,9 +47,30 @@ struct Derived_2 : public Base {
 // CHECK-NOT: !callback
 void Derived_2::virtual_1(void (*callback)(void)) {}
 
+class ExplicitParameterObject {
+  __attribute__((callback(1, 0))) void implicit_this_idx(void (*callback)(ExplicitParameterObject*));
+  __attribute__((callback(1, this))) void implicit_this_identifier(void (*callback)(ExplicitParameterObject*));
+  __attribute__((callback(2, 1))) void explicit_this_idx(this ExplicitParameterObject* self, void (*callback)(ExplicitParameterObject*));
+  __attribute__((callback(2, self))) void explicit_this_identifier(this ExplicitParameterObject* self, void (*callback)(ExplicitParameterObject*));
+};
+
+// CHECK-DAG: define{{.*}} void @_ZN23ExplicitParameterObject17implicit_this_idxEPFvPS_E({{[^!]*!callback}} ![[cid3:[0-9]+]]
+void ExplicitParameterObject::implicit_this_idx(void (*callback)(ExplicitParameterObject*)) {}
+
+// CHECK-DAG: define{{.*}} void @_ZN23ExplicitParameterObject24implicit_this_identifierEPFvPS_E({{[^!]*!callback}} ![[cid3]]
+void ExplicitParameterObject::implicit_this_identifier(void (*callback)(ExplicitParameterObject*)) {}
+
+// CHECK-DAG: define{{.*}} void @_ZNH23ExplicitParameterObject17explicit_this_idxEPS_PFvS0_E({{[^!]*!callback}} ![[cid3]]
+void ExplicitParameterObject::explicit_this_idx(this ExplicitParameterObject* self, void (*callback)(ExplicitParameterObject*)) {}
+
+// CHECK-DAG: define{{.*}} void @_ZNH23ExplicitParameterObject24explicit_this_identifierEPS_PFvS0_E({{[^!]*!callback}} ![[cid3]]
+void ExplicitParameterObject::explicit_this_identifier(this ExplicitParameterObject* self, void (*callback)(ExplicitParameterObject*)) {}
+
 // CHECK-DAG: ![[cid0]] = !{![[cid0b:[0-9]+]]}
 // CHECK-DAG: ![[cid0b]] = !{i64 1, i1 false}
 // CHECK-DAG: ![[cid1]] = !{![[cid1b:[0-9]+]]}
 // CHECK-DAG: ![[cid1b]] = !{i64 2, i1 false}
 // CHECK-DAG: ![[cid2]] = !{![[cid2b:[0-9]+]]}
 // CHECK-DAG: ![[cid2b]] = !{i64 1, i64 0, i64 -1, i64 0, i1 false}
+// CHECK-DAG: ![[cid3]] = !{![[cid3b:[0-9]+]]}
+// CHECK-DAG: ![[cid3b]] = !{i64 1, i64 0, i1 false}
diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
deleted file mode 100644
index d0bcd1f..0000000
--- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
+++ /dev/null
@@ -1,633 +0,0 @@
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -include opencl-c.h -triple amdgcn -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -O0 -cl-std=CL2.0 -include opencl-c.h -triple amdgcn -emit-llvm -o - | FileCheck --check-prefix=NOOPT %s
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -include opencl-c.h -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -include opencl-c.h -triple amdgcn -fcommon -emit-llvm -o - | FileCheck %s --check-prefix=COMMON
-
-typedef struct {
-  private char *p1;
-  local char *p2;
-  constant char *p3;
-  global char *p4;
-  generic char *p5;
-} StructTy1;
-
-typedef struct {
-  constant char *p3;
-  global char *p4;
-  generic char *p5;
-} StructTy2;
-
-// Test 0 as initializer.
-
-// CHECK: @private_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
-private char *private_p = 0;
-
-// CHECK: @local_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
-local char *local_p = 0;
-
-// CHECK: @global_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) null, align 8
-global char *global_p = 0;
-
-// CHECK: @constant_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
-constant char *constant_p = 0;
-
-// CHECK: @generic_p ={{.*}} local_unnamed_addr addrspace(1) global ptr null, align 8
-generic char *generic_p = 0;
-
-// Test NULL as initializer.
-
-// CHECK: @private_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
-private char *private_p_NULL = NULL;
-
-// CHECK: @local_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
-local char *local_p_NULL = NULL;
-
-// CHECK: @global_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) null, align 8
-global char *global_p_NULL = NULL;
-
-// CHECK: @constant_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
-constant char *constant_p_NULL = NULL;
-
-// CHECK: @generic_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr null, align 8
-generic char *generic_p_NULL = NULL;
-
-// Test constant folding of null pointer.
-// A null pointer should be folded to a null pointer in the target address space.
-
-// CHECK: @fold_generic ={{.*}} local_unnamed_addr addrspace(1) global ptr null, align 8
-generic int *fold_generic = (global int*)(generic float*)(private char*)0;
-
-// CHECK: @fold_priv ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr addrspace(1) null to ptr addrspace(5)), align 4
-private short *fold_priv = (private short*)(generic int*)(global void*)0;
-
-// CHECK: @fold_priv_arith ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) inttoptr (i32 9 to ptr addrspace(5)), align 4
-private char *fold_priv_arith = (private char*)0 + 10;
-
-// CHECK: @fold_local_arith ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) inttoptr (i32 9 to ptr addrspace(3)), align 4
-local char *fold_local_arith = (local char*)0 + 10;
-
-// CHECK: @fold_int ={{.*}} local_unnamed_addr addrspace(1) global i32 13, align 4
-int fold_int = (int)(private void*)(generic char*)(global int*)0 + 14;
-
-// CHECK: @fold_int2 ={{.*}} local_unnamed_addr addrspace(1) global i32 12, align 4
-int fold_int2 = (int) ((private void*)0 + 13);
-
-// CHECK: @fold_int3 ={{.*}} local_unnamed_addr addrspace(1) global i32 -1, align 4
-int fold_int3 = (int) ((private int*)0);
-
-// CHECK: @fold_int4 ={{.*}} local_unnamed_addr addrspace(1) global i32 7, align 4
-int fold_int4 = (int) &((private int*)0)[2];
-
-// CHECK: @fold_int5 ={{.*}} local_unnamed_addr addrspace(1) global i32 3, align 4
-int fold_int5 = (int) &((private StructTy1*)0)->p2;
-
-
-// CHECK: @fold_int_local ={{.*}} local_unnamed_addr addrspace(1) global i32 13, align 4
-int fold_int_local = (int)(local void*)(generic char*)(global int*)0 + 14;
-
-// CHECK: @fold_int2_local ={{.*}} local_unnamed_addr addrspace(1) global i32 12, align 4
-int fold_int2_local = (int) ((local void*)0 + 13);
-
-// CHECK: @fold_int3_local ={{.*}} local_unnamed_addr addrspace(1) global i32 -1, align 4
-int fold_int3_local = (int) ((local int*)0);
-
-// CHECK: @fold_int4_local ={{.*}} local_unnamed_addr addrspace(1) global i32 7, align 4
-int fold_int4_local = (int) &((local int*)0)[2];
-
-// CHECK: @fold_int5_local ={{.*}} local_unnamed_addr addrspace(1) global i32 3, align 4
-int fold_int5_local = (int) &((local StructTy1*)0)->p2;
-
-
-// Test static variable initialization.
-
-// NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
-// NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
-// NOOPT: @test_static_var_private.sp3 = internal addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
-// NOOPT: @test_static_var_private.sp4 = internal addrspace(1) global ptr addrspace(5) null, align 4
-// NOOPT: @test_static_var_private.sp5 = internal addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
-// NOOPT: @test_static_var_private.SS1 = internal addrspace(1) global %struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }, align 8
-// NOOPT: @test_static_var_private.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
-
-void test_static_var_private(void) {
-  static private char *sp1 = 0;
-  static private char *sp2 = NULL;
-  static private char *sp3;
-  static private char *sp4 = (private char*)((void)0, 0);
-  const int x = 0;
-  static private char *sp5 = (private char*)x;
-  static StructTy1 SS1;
-  static StructTy2 SS2;
-}
-
-// NOOPT: @test_static_var_local.sp1 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
-// NOOPT: @test_static_var_local.sp2 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
-// NOOPT: @test_static_var_local.sp3 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
-// NOOPT: @test_static_var_local.sp4 = internal addrspace(1) global ptr addrspace(3) null, align 4
-// NOOPT: @test_static_var_local.sp5 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
-// NOOPT: @test_static_var_local.SS1 = internal addrspace(1) global %struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }, align 8
-// NOOPT: @test_static_var_local.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
-void test_static_var_local(void) {
-  static local char *sp1 = 0;
-  static local char *sp2 = NULL;
-  static local char *sp3;
-  static local char *sp4 = (local char*)((void)0, 0);
-  const int x = 0;
-  static local char *sp5 = (local char*)x;
-  static StructTy1 SS1;
-  static StructTy2 SS2;
-}
-
-// Test function-scope variable initialization.
-// NOOPT-LABEL: @test_func_scope_var_private(
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp1{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp2{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) null, ptr addrspace(5) %sp3{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp4{{.*}}, align 4
-// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false)
-// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, i8 0, i64 24, i1 false)
-void test_func_scope_var_private(void) {
-  private char *sp1 = 0;
-  private char *sp2 = NULL;
-  private char *sp3 = (private char*)((void)0, 0);
-  const int x = 0;
-  private char *sp4 = (private char*)x;
-  StructTy1 SS1 = {0, 0, 0, 0, 0};
-  StructTy2 SS2 = {0, 0, 0};
-}
-
-// Test function-scope variable initialization.
-// NOOPT-LABEL: @test_func_scope_var_local(
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp1{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp2{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) null, ptr addrspace(5) %sp3{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp4{{.*}}, align 4
-// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false)
-// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, i8 0, i64 24, i1 false)
-void test_func_scope_var_local(void) {
-  local char *sp1 = 0;
-  local char *sp2 = NULL;
-  local char *sp3 = (local char*)((void)0, 0);
-  const int x = 0;
-  local char *sp4 = (local char*)x;
-  StructTy1 SS1 = {0, 0, 0, 0, 0};
-  StructTy2 SS2 = {0, 0, 0};
-}
-
-
-// Test default initialization of pointers.
-
-// Tentative definition of global variables with non-zero initializer
-// cannot have common linkage since common linkage requires zero initialization
-// and does not have explicit section.
-
-// CHECK: @p1 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
-// COMMON: @p1 = weak local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
-private char *p1;
-
-// CHECK: @p2 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
-// COMMON: @p2 = weak local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
-local char *p2;
-
-// CHECK: @p3 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
-// COMMON: @p3 = common local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
-constant char *p3;
-
-// CHECK: @p4 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) null, align 8
-// COMMON: @p4 = common local_unnamed_addr addrspace(1) global ptr addrspace(1) null, align 8
-global char *p4;
-
-// CHECK: @p5 ={{.*}} local_unnamed_addr addrspace(1) global ptr null, align 8
-// COMMON: @p5 = common local_unnamed_addr addrspace(1) global ptr null, align 8
-generic char *p5;
-
-// Test default initialization of structure.
-
-// CHECK: @S1 ={{.*}} local_unnamed_addr addrspace(1) global %struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }, align 8
-StructTy1 S1;
-
-// CHECK: @S2 ={{.*}} local_unnamed_addr addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
-StructTy2 S2;
-
-// Test default initialization of array.
-// CHECK: @A1 ={{.*}} local_unnamed_addr addrspace(1) global [2 x %struct.StructTy1] [%struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }, %struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }], align 8
-StructTy1 A1[2];
-
-// CHECK: @A2 ={{.*}} local_unnamed_addr addrspace(1) global [2 x %struct.StructTy2] zeroinitializer, align 8
-StructTy2 A2[2];
-
-// Test comparison with 0.
-
-// CHECK-LABEL: cmp_private
-// CHECK: icmp eq ptr addrspace(5) %p, addrspacecast (ptr null to ptr addrspace(5))
-void cmp_private(private char* p) {
-  if (p != 0)
-    *p = 0;
-}
-
-// CHECK-LABEL: cmp_local
-// CHECK: icmp eq ptr addrspace(3) %p, addrspacecast (ptr null to ptr addrspace(3))
-void cmp_local(local char* p) {
-  if (p != 0)
-    *p = 0;
-}
-
-// CHECK-LABEL: cmp_global
-// CHECK: icmp eq ptr addrspace(1) %p, null
-void cmp_global(global char* p) {
-  if (p != 0)
-    *p = 0;
-}
-
-// CHECK-LABEL: cmp_constant
-// CHECK: icmp eq ptr addrspace(4) %p, null
-char cmp_constant(constant char* p) {
-  if (p != 0)
-    return *p;
-  else
-    return 0;
-}
-
-// CHECK-LABEL: cmp_generic
-// CHECK: icmp eq ptr %p, null
-void cmp_generic(generic char* p) {
-  if (p != 0)
-    *p = 0;
-}
-
-// Test comparison with NULL.
-
-// CHECK-LABEL: cmp_NULL_private
-// CHECK: icmp eq ptr addrspace(5) %p, addrspacecast (ptr null to ptr addrspace(5))
-void cmp_NULL_private(private char* p) {
-  if (p != NULL)
-    *p = 0;
-}
-
-// CHECK-LABEL: cmp_NULL_local
-// CHECK: icmp eq ptr addrspace(3) %p, addrspacecast (ptr null to ptr addrspace(3))
-void cmp_NULL_local(local char* p) {
-  if (p != NULL)
-    *p = 0;
-}
-
-// CHECK-LABEL: cmp_NULL_global
-// CHECK: icmp eq ptr addrspace(1) %p, null
-void cmp_NULL_global(global char* p) {
-  if (p != NULL)
-    *p = 0;
-}
-
-// CHECK-LABEL: cmp_NULL_constant
-// CHECK: icmp eq ptr addrspace(4) %p, null
-char cmp_NULL_constant(constant char* p) {
-  if (p != NULL)
-    return *p;
-  else
-    return 0;
-}
-
-// CHECK-LABEL: cmp_NULL_generic
-// CHECK: icmp eq ptr %p, null
-void cmp_NULL_generic(generic char* p) {
-  if (p != NULL)
-    *p = 0;
-}
-
-// Test storage 0 as null pointer.
-// CHECK-LABEL: test_storage_null_pointer
-// CHECK: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %arg_private
-// CHECK: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %arg_local
-// CHECK: store ptr addrspace(1) null, ptr %arg_global
-// CHECK: store ptr addrspace(4) null, ptr %arg_constant
-// CHECK: store ptr null, ptr %arg_generic
-void test_storage_null_pointer(private char** arg_private,
-                               local char** arg_local,
-                               global char** arg_global,
-                               constant char** arg_constant,
-                               generic char** arg_generic) {
-   *arg_private = 0;
-   *arg_local = 0;
-   *arg_global = 0;
-   *arg_constant = 0;
-   *arg_generic = 0;
-}
-
-// Test storage NULL as null pointer.
-// CHECK-LABEL: test_storage_null_pointer_NULL
-// CHECK: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %arg_private
-// CHECK: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %arg_local
-// CHECK: store ptr addrspace(1) null, ptr %arg_global
-// CHECK: store ptr addrspace(4) null, ptr %arg_constant
-// CHECK: store ptr null, ptr %arg_generic
-void test_storage_null_pointer_NULL(private char** arg_private,
-                                    local char** arg_local,
-                                    global char** arg_global,
-                                    constant char** arg_constant,
-                                    generic char** arg_generic) {
-   *arg_private = NULL;
-   *arg_local = NULL;
-   *arg_global = NULL;
-   *arg_constant = NULL;
-   *arg_generic = NULL;
-}
-
-// Test pass null pointer to function as argument.
-void test_pass_null_pointer_arg_calee(private char* arg_private,
-                                      local char* arg_local,
-                                      global char* arg_global,
-                                      constant char* arg_constant,
-                                      generic char* arg_generic);
-
-// CHECK-LABEL: test_pass_null_pointer_arg
-// CHECK: call void @test_pass_null_pointer_arg_calee(ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(1) null, ptr addrspace(4) null, ptr null)
-// CHECK: call void @test_pass_null_pointer_arg_calee(ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(1) null, ptr addrspace(4) null, ptr null)
-void test_pass_null_pointer_arg(void) {
-  test_pass_null_pointer_arg_calee(0, 0, 0, 0, 0);
-  test_pass_null_pointer_arg_calee(NULL, NULL, NULL, NULL, NULL);
-}
-
-// Test cast null pointer to size_t.
-void test_cast_null_pointer_to_sizet_calee(size_t arg_private,
-                                           size_t arg_local,
-                                           size_t arg_global,
-                                           size_t arg_constant,
-                                           size_t arg_generic);
-
-// CHECK-LABEL: test_cast_null_pointer_to_sizet
-// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i64), i64 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i64), i64 0, i64 0, i64 0)
-// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i64), i64 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i64), i64 0, i64 0, i64 0)
-void test_cast_null_pointer_to_sizet(void) {
-  test_cast_null_pointer_to_sizet_calee((size_t)((private char*)0),
-                                        (size_t)((local char*)0),
-                                        (size_t)((global char*)0),
-                                        (size_t)((constant char*)0),
-                                        (size_t)((generic char*)0));
-  test_cast_null_pointer_to_sizet_calee((size_t)((private char*)NULL),
-                                        (size_t)((local char*)NULL),
-                                        (size_t)((global char*)NULL),
-                                        (size_t)((constant char*)0), // NULL cannot be casted to constant pointer since it is defined as a generic pointer
-                                        (size_t)((generic char*)NULL));
-}
-
-// Test comparison between null pointers.
-#define TEST_EQ00(addr1, addr2) int test_eq00_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)0; }
-#define TEST_EQ0N(addr1, addr2) int test_eq0N_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)NULL; }
-#define TEST_EQN0(addr1, addr2) int test_eqN0_##addr1##_##addr2(void) { return (addr1 char*)NULL == (addr2 char*)0; }
-#define TEST_EQNN(addr1, addr2) int test_eqNN_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)NULL; }
-#define TEST_NE00(addr1, addr2) int test_ne00_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)0; }
-#define TEST_NE0N(addr1, addr2) int test_ne0N_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)NULL; }
-#define TEST_NEN0(addr1, addr2) int test_neN0_##addr1##_##addr2(void) { return (addr1 char*)NULL != (addr2 char*)0; }
-#define TEST_NENN(addr1, addr2) int test_neNN_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)NULL; }
-#define TEST(addr1, addr2) \
-        TEST_EQ00(addr1, addr2) \
-        TEST_EQ0N(addr1, addr2) \
-        TEST_EQN0(addr1, addr2) \
-        TEST_EQNN(addr1, addr2) \
-        TEST_NE00(addr1, addr2) \
-        TEST_NE0N(addr1, addr2) \
-        TEST_NEN0(addr1, addr2) \
-        TEST_NENN(addr1, addr2)
-
-// CHECK-LABEL: test_eq00_generic_private
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eq0N_generic_private
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eqN0_generic_private
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eqNN_generic_private
-// CHECK: ret i32 1
-// CHECK-LABEL: test_ne00_generic_private
-// CHECK: ret i32 0
-// CHECK-LABEL: test_ne0N_generic_private
-// CHECK: ret i32 0
-// CHECK-LABEL: test_neN0_generic_private
-// CHECK: ret i32 0
-// CHECK-LABEL: test_neNN_generic_private
-// CHECK: ret i32 0
-TEST(generic, private)
-
-// CHECK-LABEL: test_eq00_generic_local
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eq0N_generic_local
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eqN0_generic_local
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eqNN_generic_local
-// CHECK: ret i32 1
-// CHECK-LABEL: test_ne00_generic_local
-// CHECK: ret i32 0
-// CHECK-LABEL: test_ne0N_generic_local
-// CHECK: ret i32 0
-// CHECK-LABEL: test_neN0_generic_local
-// CHECK: ret i32 0
-// CHECK-LABEL: test_neNN_generic_local
-// CHECK: ret i32 0
-TEST(generic, local)
-
-// CHECK-LABEL: test_eq00_generic_global
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eq0N_generic_global
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eqN0_generic_global
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eqNN_generic_global
-// CHECK: ret i32 1
-// CHECK-LABEL: test_ne00_generic_global
-// CHECK: ret i32 0
-// CHECK-LABEL: test_ne0N_generic_global
-// CHECK: ret i32 0
-// CHECK-LABEL: test_neN0_generic_global
-// CHECK: ret i32 0
-// CHECK-LABEL: test_neNN_generic_global
-// CHECK: ret i32 0
-TEST(generic, global)
-
-// CHECK-LABEL: test_eq00_generic_generic
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eq0N_generic_generic
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eqN0_generic_generic
-// CHECK: ret i32 1
-// CHECK-LABEL: test_eqNN_generic_generic
-// CHECK: ret i32 1
-// CHECK-LABEL: test_ne00_generic_generic
-// CHECK: ret i32 0
-// CHECK-LABEL: test_ne0N_generic_generic
-// CHECK: ret i32 0
-// CHECK-LABEL: test_neN0_generic_generic
-// CHECK: ret i32 0
-// CHECK-LABEL: test_neNN_generic_generic
-// CHECK: ret i32 0
-TEST(generic, generic)
-
-// CHECK-LABEL: test_eq00_constant_constant
-// CHECK: ret i32 1
-TEST_EQ00(constant, constant)
-
-// Test cast to bool.
-
-// CHECK-LABEL: cast_bool_private
-// CHECK: icmp eq ptr addrspace(5) %p, addrspacecast (ptr null to ptr addrspace(5))
-void cast_bool_private(private char* p) {
-  if (p)
-    *p = 0;
-}
-
-// CHECK-LABEL: cast_bool_local
-// CHECK: icmp eq ptr addrspace(3) %p, addrspacecast (ptr null to ptr addrspace(3))
-void cast_bool_local(local char* p) {
-  if (p)
-    *p = 0;
-}
-
-// CHECK-LABEL: cast_bool_global
-// CHECK: icmp eq ptr addrspace(1) %p, null
-void cast_bool_global(global char* p) {
-  if (p)
-    *p = 0;
-}
-
-// CHECK-LABEL: cast_bool_constant
-// CHECK: icmp eq ptr addrspace(4) %p, null
-char cast_bool_constant(constant char* p) {
-  if (p)
-    return *p;
-  else
-    return 0;
-}
-
-// CHECK-LABEL: cast_bool_generic
-// CHECK: icmp eq ptr %p, null
-void cast_bool_generic(generic char* p) {
-  if (p)
-    *p = 0;
-}
-
-// Test initialize a struct using memset.
-// For large structures which is mostly zero, clang generats llvm.memset for
-// the zero part and store for non-zero members.
-typedef struct {
-  long a, b, c, d;
-  private char *p;
-} StructTy3;
-
-// CHECK-LABEL: test_memset_private
-// CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) noundef align 8 {{.*}}, i8 0, i64 32, i1 false)
-// CHECK: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) %ptr, i32 32
-// CHECK: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) [[GEP]]
-// CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) {{.*}}, i32 36
-// CHECK: store i32 0, ptr addrspace(5) [[GEP1]], align 4
-void test_memset_private(private StructTy3 *ptr) {
-  StructTy3 S3 = {0, 0, 0, 0, 0};
-  *ptr = S3;
-}
-
-// Test casting literal 0 to pointer.
-// A 0 literal casted to pointer should become a null pointer.
-
-// CHECK-LABEL: test_cast_0_to_local_ptr
-// CHECK: ret ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3))
-local int* test_cast_0_to_local_ptr(void) {
-  return (local int*)0;
-}
-
-// CHECK-LABEL: test_cast_0_to_private_ptr
-// CHECK: ret ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5))
-private int* test_cast_0_to_private_ptr(void) {
-  return (private int*)0;
-}
-
-// Test casting non-literal integer with 0 value to pointer.
-// A non-literal integer expression with 0 value is casted to a pointer with
-// zero value.
-
-// CHECK-LABEL: test_cast_int_to_ptr1_private
-// CHECK: ret ptr addrspace(5) null
-private int* test_cast_int_to_ptr1_private(void) {
-  return (private int*)((void)0, 0);
-}
-
-// CHECK-LABEL: test_cast_int_to_ptr1_local
- // CHECK: ret ptr addrspace(3) null
-local int* test_cast_int_to_ptr1_local(void) {
-  return (local int*)((void)0, 0);
-}
-
-// CHECK-LABEL: test_cast_int_to_ptr2
-// CHECK: ret ptr addrspace(5) null
-private int* test_cast_int_to_ptr2(void) {
-  int x = 0;
-  return (private int*)x;
-}
-
-// Test logical operations.
-// CHECK-LABEL: test_not_nullptr
-// CHECK: ret i32 1
-int test_not_nullptr(void) {
-  return !(private char*)NULL;
-}
-
-// CHECK-LABEL: test_and_nullptr
-// CHECK: ret i32 0
-int test_and_nullptr(int a) {
-  return a && ((private char*)NULL);
-}
-
-// CHECK-LABEL: test_not_private_ptr
-// CHECK: %[[lnot:.*]] = icmp eq ptr addrspace(5) %p, addrspacecast (ptr null to ptr addrspace(5))
-// CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
-// CHECK: ret i32 %[[lnot_ext]]
-int test_not_private_ptr(private char* p) {
-  return !p;
-}
-
-// CHECK-LABEL: test_not_local_ptr
-// CHECK: %[[lnot:.*]] = icmp eq ptr addrspace(3) %p, addrspacecast (ptr null to ptr addrspace(3))
-// CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
-// CHECK: ret i32 %[[lnot_ext]]
-int test_not_local_ptr(local char* p) {
-  return !p;
-}
-
-
-// CHECK-LABEL: test_and_ptr
-// CHECK: %[[tobool:.*]] = icmp ne ptr addrspace(5) %p1, addrspacecast (ptr null to ptr addrspace(5))
-// CHECK: %[[tobool1:.*]] = icmp ne ptr addrspace(3) %p2, addrspacecast (ptr null to ptr addrspace(3))
-// CHECK: %[[res:.*]] = select i1 %[[tobool]], i1 %[[tobool1]], i1 false
-// CHECK: %[[land_ext:.*]] = zext i1 %[[res]] to i32
-// CHECK: ret i32 %[[land_ext]]
-int test_and_ptr(private char* p1, local char* p2) {
-  return p1 && p2;
-}
-
-// Test folding of null pointer in function scope.
-// NOOPT-LABEL: test_fold_private
-// NOOPT: call void @test_fold_callee
-// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8
-// NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
-// NOOPT: call void @test_fold_callee
-// NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i32) to i64
-// NOOPT: %{{.*}} = add nsw i64 %1, %[[SEXT]]
-// NOOPT: %{{.*}} = sub nsw i64 %{{.*}}, 1
-void test_fold_callee(void);
-void test_fold_private(void) {
-  global int* glob = (test_fold_callee(), (global int*)(generic char*)0);
-  long x = glob - (global int*)(generic char*)0;
-  x = x + (int)(test_fold_callee(), (private int*)(generic char*)(global short*)0);
-  x = x - (int)((private int*)0 == (private int*)(generic char*)0);
-}
-
-// NOOPT-LABEL: test_fold_local
-// NOOPT: call void @test_fold_callee
-// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8
-// NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
-// NOOPT: call void @test_fold_callee
-// NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i32) to i64
-// NOOPT: %{{.*}} = add nsw i64 %{{.*}}, %[[SEXT]]
-// NOOPT: %{{.*}} = sub nsw i64 %{{.*}}, 1
-void test_fold_local(void) {
-  global int* glob = (test_fold_callee(), (global int*)(generic char*)0);
-  long x = glob - (global int*)(generic char*)0;
-  x = x + (int)(test_fold_callee(), (local int*)(generic char*)(global short*)0);
-  x = x - (int)((local int*)0 == (local int*)(generic char*)0);
-}
diff --git a/clang/test/CodeGenOpenCL/nullptr.cl b/clang/test/CodeGenOpenCL/nullptr.cl
new file mode 100644
index 0000000..976e12c
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/nullptr.cl
@@ -0,0 +1,735 @@
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -include opencl-c.h -triple spir64 -emit-llvm -o - -Wno-void-pointer-to-int-cast -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast | FileCheck %s --check-prefixes=CHECK,SPIR64
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -O0 -cl-std=CL2.0 -include opencl-c.h -triple spir64 -emit-llvm -o - -Wno-void-pointer-to-int-cast -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast | FileCheck --check-prefixes=CHECK-NOOPT,SPIR64-NOOPT %s
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -include opencl-c.h -triple amdgcn -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,AMDGCN
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -O0 -cl-std=CL2.0 -include opencl-c.h -triple amdgcn -emit-llvm -o - | FileCheck --check-prefixes=CHECK-NOOPT,AMDGCN-NOOPT %s
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -include opencl-c.h -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s --check-prefix=AMDGCN
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -include opencl-c.h -triple amdgcn -fcommon -emit-llvm -o - | FileCheck %s --check-prefix=AMDGCN-COMMON
+
+typedef struct {
+  private char *p1;
+  local char *p2;
+  constant char *p3;
+  global char *p4;
+  generic char *p5;
+} StructTy1;
+
+typedef struct {
+  constant char *p3;
+  global char *p4;
+  generic char *p5;
+} StructTy2;
+
+// Test 0 as initializer.
+
+// SPIR64: @private_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// AMDGCN: @private_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
+private char *private_p = 0;
+
+// SPIR64: @local_p = local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 8
+// AMDGCN: @local_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
+local char *local_p = 0;
+
+// SPIR64: @global_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), align 8
+// AMDGCN: @global_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) null, align 8
+global char *global_p = 0;
+
+// SPIR64: @constant_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(2) null, align 8
+// AMDGCN: @constant_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
+constant char *constant_p = 0;
+
+// SPIR64: @generic_p ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
+// AMDGCN: @generic_p ={{.*}} local_unnamed_addr addrspace(1) global ptr null, align 8
+generic char *generic_p = 0;
+
+// Test NULL as initializer.
+
+// SPIR64: @private_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// AMDGCN: @private_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
+private char *private_p_NULL = NULL;
+
+// SPIR64: @local_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 8
+// AMDGCN: @local_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
+local char *local_p_NULL = NULL;
+
+// SPIR64: @global_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), align 8
+// AMDGCN: @global_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) null, align 8
+global char *global_p_NULL = NULL;
+
+// SPIR64: @constant_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(2) null, align 8
+// AMDGCN: @constant_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
+constant char *constant_p_NULL = NULL;
+
+// SPIR64: @generic_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
+// AMDGCN: @generic_p_NULL ={{.*}} local_unnamed_addr addrspace(1) global ptr null, align 8
+generic char *generic_p_NULL = NULL;
+
+// Test constant folding of null pointer.
+// A null pointer should be folded to a null pointer in the target address space.
+
+// SPIR64: @fold_generic ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
+// AMDGCN: @fold_generic ={{.*}} local_unnamed_addr addrspace(1) global ptr null, align 8
+generic int *fold_generic = (global int*)(generic float*)(private char*)0;
+
+// SPIR64: @fold_priv ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// AMDGCN: @fold_priv ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr addrspace(1) null to ptr addrspace(5)), align 4
+private short *fold_priv = (private short*)(generic int*)(global void*)0;
+
+// SPIR64: @fold_priv_arith ={{.*}} local_unnamed_addr addrspace(1) global ptr inttoptr (i64 10 to ptr), align 8
+// AMDGCN: @fold_priv_arith ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) inttoptr (i32 9 to ptr addrspace(5)), align 4
+private char *fold_priv_arith = (private char*)0 + 10;
+
+// SPIR64: @fold_local_arith ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) inttoptr (i64 10 to ptr addrspace(3)), align 8
+// AMDGCN: @fold_local_arith ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) inttoptr (i32 9 to ptr addrspace(3)), align 4
+local char *fold_local_arith = (local char*)0 + 10;
+
+// SPIR64: @fold_int ={{.*}} local_unnamed_addr addrspace(1) global i32 14, align 4
+// AMDGCN: @fold_int ={{.*}} local_unnamed_addr addrspace(1) global i32 13, align 4
+int fold_int = (int)(private void*)(generic char*)(global int*)0 + 14;
+
+// SPIR64: @fold_int2 ={{.*}} local_unnamed_addr addrspace(1) global i32 13, align 4
+// AMDGCN: @fold_int2 ={{.*}} local_unnamed_addr addrspace(1) global i32 12, align 4
+int fold_int2 = (int) ((private void*)0 + 13);
+
+// SPIR64: @fold_int3 ={{.*}} local_unnamed_addr addrspace(1) global i32 0, align 4
+// AMDGCN: @fold_int3 ={{.*}} local_unnamed_addr addrspace(1) global i32 -1, align 4
+int fold_int3 = (int) ((private int*)0);
+
+// SPIR64: @fold_int4 ={{.*}} local_unnamed_addr addrspace(1) global i32 8, align 4
+// AMDGCN: @fold_int4 ={{.*}} local_unnamed_addr addrspace(1) global i32 7, align 4
+int fold_int4 = (int) &((private int*)0)[2];
+
+// SPIR64: @fold_int5 ={{.*}} local_unnamed_addr addrspace(1) global i32 8, align 4
+// AMDGCN: @fold_int5 ={{.*}} local_unnamed_addr addrspace(1) global i32 3, align 4
+int fold_int5 = (int) &((private StructTy1*)0)->p2;
+
+// SPIR64: @fold_int_local ={{.*}} local_unnamed_addr addrspace(1) global i32 14, align 4
+// AMDGCN: @fold_int_local = local_unnamed_addr addrspace(1) global i32 13, align 4
+int fold_int_local = (int)(local void*)(generic char*)(global int*)0 + 14;
+
+// SPIR64: @fold_int2_local ={{.*}} local_unnamed_addr addrspace(1) global i32 13, align 4
+// AMDGCN: @fold_int2_local ={{.*}} local_unnamed_addr addrspace(1) global i32 12, align 4
+int fold_int2_local = (int) ((local void*)0 + 13);
+
+// SPIR64: @fold_int3_local ={{.*}} local_unnamed_addr addrspace(1) global i32 0, align 4
+// AMDGCN: @fold_int3_local ={{.*}} local_unnamed_addr addrspace(1) global i32 -1, align 4
+int fold_int3_local = (int) ((local int*)0);
+
+// SPIR64: @fold_int4_local ={{.*}} local_unnamed_addr addrspace(1) global i32 8, align 4
+// AMDGCN: @fold_int4_local ={{.*}} local_unnamed_addr addrspace(1) global i32 7, align 4
+int fold_int4_local = (int) &((local int*)0)[2];
+
+// SPIR64: @fold_int5_local ={{.*}} local_unnamed_addr addrspace(1) global i32 8, align 4
+// AMDGCN: @fold_int5_local ={{.*}} local_unnamed_addr addrspace(1) global i32 3, align 4
+int fold_int5_local = (int) &((local StructTy1*)0)->p2;
+
+
+// Test static variable initialization.
+
+// SPIR64-NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// SPIR64-NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// SPIR64-NOOPT: @test_static_var_private.sp3 = internal addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// SPIR64-NOOPT: @test_static_var_private.sp4 = internal addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// SPIR64-NOOPT: @test_static_var_private.sp5 = internal addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// SPIR64-NOOPT: @test_static_var_private.SS1 = internal addrspace(1) global %struct.StructTy1 zeroinitializer, align 8
+// AMDGCN-NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
+// AMDGCN-NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
+// AMDGCN-NOOPT: @test_static_var_private.sp3 = internal addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
+// AMDGCN-NOOPT: @test_static_var_private.sp4 = internal addrspace(1) global ptr addrspace(5) null, align 4
+// AMDGCN-NOOPT: @test_static_var_private.sp5 = internal addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
+// AMDGCN-NOOPT: @test_static_var_private.SS1 = internal addrspace(1) global %struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }, align 8
+// CHECK-NOOPT: @test_static_var_private.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
+
+void test_static_var_private(void) {
+  static private char *sp1 = 0;
+  static private char *sp2 = NULL;
+  static private char *sp3;
+  static private char *sp4 = (private char*)((void)0, 0);
+  const int x = 0;
+  static private char *sp5 = (private char*)x;
+  static StructTy1 SS1;
+  static StructTy2 SS2;
+}
+
+// SPIR64-NOOPT: @test_static_var_local.sp1 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 8
+// SPIR64-NOOPT: @test_static_var_local.sp2 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 8
+// SPIR64-NOOPT: @test_static_var_local.sp3 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 8
+// SPIR64-NOOPT: @test_static_var_local.sp4 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 8
+// SPIR64-NOOPT: @test_static_var_local.sp5 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 8
+// SPIR64-NOOPT: @test_static_var_local.SS1 = internal addrspace(1) global %struct.StructTy1 zeroinitializer, align 8
+// AMDGCN-NOOPT: @test_static_var_local.sp1 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
+// AMDGCN-NOOPT: @test_static_var_local.sp2 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
+// AMDGCN-NOOPT: @test_static_var_local.sp3 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
+// AMDGCN-NOOPT: @test_static_var_local.sp4 = internal addrspace(1) global ptr addrspace(3) null, align 4
+// AMDGCN-NOOPT: @test_static_var_local.sp5 = internal addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
+// AMDGCN-NOOPT: @test_static_var_local.SS1 = internal addrspace(1) global %struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }, align 8
+// CHECK-NOOPT: @test_static_var_local.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
+void test_static_var_local(void) {
+  static local char *sp1 = 0;
+  static local char *sp2 = NULL;
+  static local char *sp3;
+  static local char *sp4 = (local char*)((void)0, 0);
+  const int x = 0;
+  static local char *sp5 = (local char*)x;
+  static StructTy1 SS1;
+  static StructTy2 SS2;
+}
+
+// Test function-scope variable initialization.
+// CHECK-NOOPT-LABEL: @test_func_scope_var_private(
+// SPIR64-NOOPT: store ptr addrspacecast (ptr addrspace(4) null to ptr), ptr %sp1{{.*}}, align 8
+// SPIR64-NOOPT: store ptr addrspacecast (ptr addrspace(4) null to ptr), ptr %sp2{{.*}}, align 8
+// SPIR64-NOOPT: store ptr null, ptr %sp3{{.*}}, align 8
+// SPIR64-NOOPT: store ptr addrspacecast (ptr addrspace(4) null to ptr), ptr %sp4{{.*}}, align 8
+// SPIR64-NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS1{{.*}}, i8 0, i64 40, i1 false)
+// SPIR64-NOOPT: call void @llvm.memcpy.p0.p2.i64(ptr align 8 %SS2{{.*}}, ptr addrspace(2) align 8 @__const.test_func_scope_var_private.SS2, i64 24, i1 false)
+// AMDGCN-NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp1{{.*}}, align 4
+// AMDGCN-NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp2{{.*}}, align 4
+// AMDGCN-NOOPT: store ptr addrspace(5) null, ptr addrspace(5) %sp3{{.*}}, align 4
+// AMDGCN-NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp4{{.*}}, align 4
+// AMDGCN-NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false)
+// AMDGCN-NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, i8 0, i64 24, i1 false)
+void test_func_scope_var_private(void) {
+  private char *sp1 = 0;
+  private char *sp2 = NULL;
+  private char *sp3 = (private char*)((void)0, 0);
+  const int x = 0;
+  private char *sp4 = (private char*)x;
+  StructTy1 SS1 = {0, 0, 0, 0, 0};
+  StructTy2 SS2 = {0, 0, 0};
+}
+
+// Test function-scope variable initialization.
+// CHECK-NOOPT-LABEL: @test_func_scope_var_local(
+// SPIR64-NOOPT: store ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), ptr %sp1{{.*}}, align 8
+// SPIR64-NOOPT: store ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), ptr %sp2{{.*}}, align 8
+// SPIR64-NOOPT: store ptr addrspace(3) null, ptr %sp3{{.*}}, align 8
+// SPIR64-NOOPT: store ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), ptr %sp4{{.*}}, align 8
+// SPIR64-NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS1{{.*}}, i8 0, i64 40, i1 false)
+// SPIR64-NOOPT: call void @llvm.memcpy.p0.p2.i64(ptr align 8 %SS2{{.*}}, ptr addrspace(2) align 8 @__const.test_func_scope_var_local.SS2, i64 24, i1 false)
+// AMDGCN-NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp1{{.*}}, align 4
+// AMDGCN-NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp2{{.*}}, align 4
+// AMDGCN-NOOPT: store ptr addrspace(3) null, ptr addrspace(5) %sp3{{.*}}, align 4
+// AMDGCN-NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp4{{.*}}, align 4
+// AMDGCN-NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false)
+// AMDGCN-NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, i8 0, i64 24, i1 false)
+void test_func_scope_var_local(void) {
+  local char *sp1 = 0;
+  local char *sp2 = NULL;
+  local char *sp3 = (local char*)((void)0, 0);
+  const int x = 0;
+  local char *sp4 = (local char*)x;
+  StructTy1 SS1 = {0, 0, 0, 0, 0};
+  StructTy2 SS2 = {0, 0, 0};
+}
+
+
+// Test default initialization of pointers.
+
+// Tentative definition of global variables with non-zero initializer
+// cannot have common linkage since common linkage requires zero initialization
+// and does not have explicit section.
+
+// SPIR64: @p1 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspacecast (ptr addrspace(4) null to ptr), align 8
+// AMDGCN: @p1 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
+// AMDGCN-COMMON: @p1 = weak local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), align 4
+private char *p1;
+
+// SPIR64: @p2 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 8
+// AMDGCN: @p2 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
+// AMDGCN-COMMON: @p2 = weak local_unnamed_addr addrspace(1) global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 4
+local char *p2;
+
+// SPIR64: @p3 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(2) null, align 8
+// AMDGCN: @p3 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
+// AMDGCN-COMMON: @p3 = common local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
+constant char *p3;
+
+// SPIR64: @p4 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), align 8
+// AMDGCN: @p4 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(1) null, align 8
+// AMDGCN-COMMON: @p4 = common local_unnamed_addr addrspace(1) global ptr addrspace(1) null, align 8
+global char *p4;
+
+// SPIR64: @p5 ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(4) null, align 8
+// AMDGCN: @p5 ={{.*}} local_unnamed_addr addrspace(1) global ptr null, align 8
+// AMDGCN-COMMON: @p5 = common local_unnamed_addr addrspace(1) global ptr null, align 8
+generic char *p5;
+
+// Test default initialization of structure.
+
+// SPIR64: @S1 ={{.*}} local_unnamed_addr addrspace(1) global %struct.StructTy1 zeroinitializer, align 8
+// AMDGCN: @S1 ={{.*}} local_unnamed_addr addrspace(1) global %struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }, align 8
+StructTy1 S1;
+
+// CHECK: @S2 ={{.*}} local_unnamed_addr addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
+StructTy2 S2;
+
+// Test default initialization of array.
+// SPIR64: @A1 ={{.*}} local_unnamed_addr addrspace(1) global [2 x %struct.StructTy1] zeroinitializer, align 8
+// AMDGCN: @A1 ={{.*}} local_unnamed_addr addrspace(1) global [2 x %struct.StructTy1] [%struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }, %struct.StructTy1 { ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(4) null, ptr addrspace(1) null, ptr null }], align 8
+StructTy1 A1[2];
+
+// CHECK: @A2 ={{.*}} local_unnamed_addr addrspace(1) global [2 x %struct.StructTy2] zeroinitializer, align 8
+StructTy2 A2[2];
+
+// Test comparison with 0.
+
+// CHECK-LABEL: cmp_private
+// SPIR64: icmp eq ptr %p, addrspacecast (ptr addrspace(4) null to ptr)
+// AMDGCN: icmp eq ptr addrspace(5) %p, addrspacecast (ptr null to ptr addrspace(5))
+void cmp_private(private char* p) {
+  if (p != 0)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_local
+// SPIR64: icmp eq ptr addrspace(3) %p, addrspacecast (ptr addrspace(4) null to ptr addrspace(3))
+// AMDGCN: icmp eq ptr addrspace(3) %p, addrspacecast (ptr null to ptr addrspace(3))
+void cmp_local(local char* p) {
+  if (p != 0)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_global
+// SPIR64: icmp eq ptr addrspace(1) %p, addrspacecast (ptr addrspace(4) null to ptr addrspace(1))
+// AMDGCN: icmp eq ptr addrspace(1) %p, null
+void cmp_global(global char* p) {
+  if (p != 0)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_constant
+// SPIR64: icmp eq ptr addrspace(2) %p, null
+// AMDGCN: icmp eq ptr addrspace(4) %p, null
+char cmp_constant(constant char* p) {
+  if (p != 0)
+    return *p;
+  else
+    return 0;
+}
+
+// CHECK-LABEL: cmp_generic
+// SPIR64: icmp eq ptr addrspace(4) %p, null
+// AMDGCN: icmp eq ptr %p, null
+void cmp_generic(generic char* p) {
+  if (p != 0)
+    *p = 0;
+}
+
+// Test comparison with NULL.
+
+// CHECK-LABEL: cmp_NULL_private
+// SPIR64: icmp eq ptr %p, addrspacecast (ptr addrspace(4) null to ptr)
+// AMDGCN: icmp eq ptr addrspace(5) %p, addrspacecast (ptr null to ptr addrspace(5))
+void cmp_NULL_private(private char* p) {
+  if (p != NULL)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_NULL_local
+// SPIR64: icmp eq ptr addrspace(3) %p, addrspacecast (ptr addrspace(4) null to ptr addrspace(3))
+// AMDGCN: icmp eq ptr addrspace(3) %p, addrspacecast (ptr null to ptr addrspace(3))
+void cmp_NULL_local(local char* p) {
+  if (p != NULL)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_NULL_global
+// SPIR64: icmp eq ptr addrspace(1) %p, addrspacecast (ptr addrspace(4) null to ptr addrspace(1))
+// AMDGCN: icmp eq ptr addrspace(1) %p, null
+void cmp_NULL_global(global char* p) {
+  if (p != NULL)
+    *p = 0;
+}
+
+// CHECK-LABEL: cmp_NULL_constant
+// SPIR64: icmp eq ptr addrspace(2) %p, null
+// AMDGCN: icmp eq ptr addrspace(4) %p, null
+char cmp_NULL_constant(constant char* p) {
+  if (p != NULL)
+    return *p;
+  else
+    return 0;
+}
+
+// CHECK-LABEL: cmp_NULL_generic
+// SPIR64: icmp eq ptr addrspace(4) %p, null
+// AMDGCN: icmp eq ptr %p, null
+void cmp_NULL_generic(generic char* p) {
+  if (p != NULL)
+    *p = 0;
+}
+
+// Test storage 0 as null pointer.
+// CHECK-LABEL: test_storage_null_pointer
+// SPIR64: store ptr addrspacecast (ptr addrspace(4) null to ptr), ptr addrspace(4) %arg_private
+// SPIR64: store ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), ptr addrspace(4) %arg_local
+// SPIR64: store ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), ptr addrspace(4) %arg_global
+// SPIR64: store ptr addrspace(2) null, ptr addrspace(4) %arg_constant
+// SPIR64: store ptr addrspace(4) null, ptr addrspace(4) %arg_generic
+// AMDGCN: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %arg_private
+// AMDGCN: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %arg_local
+// AMDGCN: store ptr addrspace(1) null, ptr %arg_global
+// AMDGCN: store ptr addrspace(4) null, ptr %arg_constant
+// AMDGCN: store ptr null, ptr %arg_generic
+void test_storage_null_pointer(private char** arg_private,
+                               local char** arg_local,
+                               global char** arg_global,
+                               constant char** arg_constant,
+                               generic char** arg_generic) {
+   *arg_private = 0;
+   *arg_local = 0;
+   *arg_global = 0;
+   *arg_constant = 0;
+   *arg_generic = 0;
+}
+
+// Test storage NULL as null pointer.
+// CHECK-LABEL: test_storage_null_pointer_NULL
+// SPIR64: store ptr addrspacecast (ptr addrspace(4) null to ptr), ptr addrspace(4) %arg_private
+// SPIR64: store ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), ptr addrspace(4) %arg_local
+// SPIR64: store ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), ptr addrspace(4) %arg_global
+// SPIR64: store ptr addrspace(2) null, ptr addrspace(4) %arg_constant
+// SPIR64: store ptr addrspace(4) null, ptr addrspace(4) %arg_generic
+// AMDGCN: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %arg_private
+// AMDGCN: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %arg_local
+// AMDGCN: store ptr addrspace(1) null, ptr %arg_global
+// AMDGCN: store ptr addrspace(4) null, ptr %arg_constant
+// AMDGCN: store ptr null, ptr %arg_generic
+void test_storage_null_pointer_NULL(private char** arg_private,
+                                    local char** arg_local,
+                                    global char** arg_global,
+                                    constant char** arg_constant,
+                                    generic char** arg_generic) {
+   *arg_private = NULL;
+   *arg_local = NULL;
+   *arg_global = NULL;
+   *arg_constant = NULL;
+   *arg_generic = NULL;
+}
+
+// Test pass null pointer to function as argument.
+void test_pass_null_pointer_arg_calee(private char* arg_private,
+                                      local char* arg_local,
+                                      global char* arg_global,
+                                      constant char* arg_constant,
+                                      generic char* arg_generic);
+
+// CHECK-LABEL: test_pass_null_pointer_arg
+// SPIR64: call spir_func void @test_pass_null_pointer_arg_calee(ptr addrspacecast (ptr addrspace(4) null to ptr), ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), ptr addrspace(2) null, ptr addrspace(4) null)
+// SPIR64: call spir_func void @test_pass_null_pointer_arg_calee(ptr addrspacecast (ptr addrspace(4) null to ptr), ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), ptr addrspace(2) null, ptr addrspace(4) null)
+// AMDGCN: call void @test_pass_null_pointer_arg_calee(ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(1) null, ptr addrspace(4) null, ptr null)
+// AMDGCN: call void @test_pass_null_pointer_arg_calee(ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(1) null, ptr addrspace(4) null, ptr null)
+void test_pass_null_pointer_arg(void) {
+  test_pass_null_pointer_arg_calee(0, 0, 0, 0, 0);
+  test_pass_null_pointer_arg_calee(NULL, NULL, NULL, NULL, NULL);
+}
+
+// Test cast null pointer to size_t.
+void test_cast_null_pointer_to_sizet_calee(size_t arg_private,
+                                           size_t arg_local,
+                                           size_t arg_global,
+                                           size_t arg_constant,
+                                           size_t arg_generic);
+
+// CHECK-LABEL: test_cast_null_pointer_to_sizet
+// SPIR64: call spir_func void @test_cast_null_pointer_to_sizet_calee(i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64), i64 ptrtoint (ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)) to i64), i64 ptrtoint (ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)) to i64), i64 0, i64 0)
+// SPIR64: call spir_func void @test_cast_null_pointer_to_sizet_calee(i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64), i64 ptrtoint (ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)) to i64), i64 ptrtoint (ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)) to i64), i64 0, i64 0)
+// AMDGCN: call void @test_cast_null_pointer_to_sizet_calee(i64 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i64), i64 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i64), i64 0, i64 0, i64 0)
+// AMDGCN: call void @test_cast_null_pointer_to_sizet_calee(i64 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i64), i64 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i64), i64 0, i64 0, i64 0)
+void test_cast_null_pointer_to_sizet(void) {
+  test_cast_null_pointer_to_sizet_calee((size_t)((private char*)0),
+                                        (size_t)((local char*)0),
+                                        (size_t)((global char*)0),
+                                        (size_t)((constant char*)0),
+                                        (size_t)((generic char*)0));
+  test_cast_null_pointer_to_sizet_calee((size_t)((private char*)NULL),
+                                        (size_t)((local char*)NULL),
+                                        (size_t)((global char*)NULL),
+                                        (size_t)((constant char*)0), // NULL cannot be casted to constant pointer since it is defined as a generic pointer
+                                        (size_t)((generic char*)NULL));
+}
+
+// Test comparison between null pointers.
+#define TEST_EQ00(addr1, addr2) int test_eq00_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)0; }
+#define TEST_EQ0N(addr1, addr2) int test_eq0N_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)NULL; }
+#define TEST_EQN0(addr1, addr2) int test_eqN0_##addr1##_##addr2(void) { return (addr1 char*)NULL == (addr2 char*)0; }
+#define TEST_EQNN(addr1, addr2) int test_eqNN_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)NULL; }
+#define TEST_NE00(addr1, addr2) int test_ne00_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)0; }
+#define TEST_NE0N(addr1, addr2) int test_ne0N_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)NULL; }
+#define TEST_NEN0(addr1, addr2) int test_neN0_##addr1##_##addr2(void) { return (addr1 char*)NULL != (addr2 char*)0; }
+#define TEST_NENN(addr1, addr2) int test_neNN_##addr1##_##addr2(void) { return (addr1 char*)0 != (addr2 char*)NULL; }
+#define TEST(addr1, addr2) \
+        TEST_EQ00(addr1, addr2) \
+        TEST_EQ0N(addr1, addr2) \
+        TEST_EQN0(addr1, addr2) \
+        TEST_EQNN(addr1, addr2) \
+        TEST_NE00(addr1, addr2) \
+        TEST_NE0N(addr1, addr2) \
+        TEST_NEN0(addr1, addr2) \
+        TEST_NENN(addr1, addr2)
+
+// CHECK-LABEL: test_eq00_generic_private
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eq0N_generic_private
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqN0_generic_private
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqNN_generic_private
+// CHECK: ret i32 1
+// CHECK-LABEL: test_ne00_generic_private
+// CHECK: ret i32 0
+// CHECK-LABEL: test_ne0N_generic_private
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neN0_generic_private
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neNN_generic_private
+// CHECK: ret i32 0
+TEST(generic, private)
+
+// CHECK-LABEL: test_eq00_generic_local
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eq0N_generic_local
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqN0_generic_local
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqNN_generic_local
+// CHECK: ret i32 1
+// CHECK-LABEL: test_ne00_generic_local
+// CHECK: ret i32 0
+// CHECK-LABEL: test_ne0N_generic_local
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neN0_generic_local
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neNN_generic_local
+// CHECK: ret i32 0
+TEST(generic, local)
+
+// CHECK-LABEL: test_eq00_generic_global
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eq0N_generic_global
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqN0_generic_global
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqNN_generic_global
+// CHECK: ret i32 1
+// CHECK-LABEL: test_ne00_generic_global
+// CHECK: ret i32 0
+// CHECK-LABEL: test_ne0N_generic_global
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neN0_generic_global
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neNN_generic_global
+// CHECK: ret i32 0
+TEST(generic, global)
+
+// CHECK-LABEL: test_eq00_generic_generic
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eq0N_generic_generic
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqN0_generic_generic
+// CHECK: ret i32 1
+// CHECK-LABEL: test_eqNN_generic_generic
+// CHECK: ret i32 1
+// CHECK-LABEL: test_ne00_generic_generic
+// CHECK: ret i32 0
+// CHECK-LABEL: test_ne0N_generic_generic
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neN0_generic_generic
+// CHECK: ret i32 0
+// CHECK-LABEL: test_neNN_generic_generic
+// CHECK: ret i32 0
+TEST(generic, generic)
+
+// CHECK-LABEL: test_eq00_constant_constant
+// CHECK: ret i32 1
+TEST_EQ00(constant, constant)
+
+// Test cast to bool.
+
+// CHECK-LABEL: cast_bool_private
+// SPIR64: icmp eq ptr %p, addrspacecast (ptr addrspace(4) null to ptr)
+// AMDGCN: icmp eq ptr addrspace(5) %p, addrspacecast (ptr null to ptr addrspace(5))
+void cast_bool_private(private char* p) {
+  if (p)
+    *p = 0;
+}
+
+// CHECK-LABEL: cast_bool_local
+// SPIR64: icmp eq ptr addrspace(3) %p, addrspacecast (ptr addrspace(4) null to ptr addrspace(3))
+// AMDGCN: icmp eq ptr addrspace(3) %p, addrspacecast (ptr null to ptr addrspace(3))
+void cast_bool_local(local char* p) {
+  if (p)
+    *p = 0;
+}
+
+// CHECK-LABEL: cast_bool_global
+// SPIR64: icmp eq ptr addrspace(1) %p, addrspacecast (ptr addrspace(4) null to ptr addrspace(1))
+// AMDGCN: icmp eq ptr addrspace(1) %p, null
+void cast_bool_global(global char* p) {
+  if (p)
+    *p = 0;
+}
+
+// CHECK-LABEL: cast_bool_constant
+// SPIR64: icmp eq ptr addrspace(2) %p, null
+// AMDGCN: icmp eq ptr addrspace(4) %p, null
+char cast_bool_constant(constant char* p) {
+  if (p)
+    return *p;
+  else
+    return 0;
+}
+
+// CHECK-LABEL: cast_bool_generic
+// SPIR64: icmp eq ptr addrspace(4) %p, null
+// AMDGCN: icmp eq ptr %p, null
+void cast_bool_generic(generic char* p) {
+  if (p)
+    *p = 0;
+}
+
+// Test initialize a struct using memset.
+// For large structures which is mostly zero, clang generats llvm.memset for
+// the zero part and store for non-zero members.
+typedef struct {
+  long a, b, c, d;
+  private char *p;
+} StructTy3;
+
+// CHECK-LABEL: test_memset_private
+// SPIR64: call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %ptr, i8 0, i64 32, i1 false)
+// SPIR64: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr %ptr, i64 32
+// SPIR64: store ptr addrspacecast (ptr addrspace(4) null to ptr), ptr [[GEP]], align 8
+// AMDGCN: call void @llvm.memset.p5.i64(ptr addrspace(5) noundef align 8 {{.*}}, i8 0, i64 32, i1 false)
+// AMDGCN: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) %ptr, i32 32
+// AMDGCN: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) [[GEP]]
+// AMDGCN: [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) {{.*}}, i32 36
+// AMDGCN: store i32 0, ptr addrspace(5) [[GEP1]], align 4
+void test_memset_private(private StructTy3 *ptr) {
+  StructTy3 S3 = {0, 0, 0, 0, 0};
+  *ptr = S3;
+}
+
+// Test casting literal 0 to pointer.
+// A 0 literal casted to pointer should become a null pointer.
+
+// CHECK-LABEL: test_cast_0_to_local_ptr
+// SPIR64: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3))
+// AMDGCN: ret ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3))
+local int* test_cast_0_to_local_ptr(void) {
+  return (local int*)0;
+}
+
+// CHECK-LABEL: test_cast_0_to_private_ptr
+// SPIR64: ptr addrspacecast (ptr addrspace(4) null to ptr)
+// AMDGCN: ret ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5))
+private int* test_cast_0_to_private_ptr(void) {
+  return (private int*)0;
+}
+
+// Test casting non-literal integer with 0 value to pointer.
+// A non-literal integer expression with 0 value is casted to a pointer with
+// zero value.
+
+// CHECK-LABEL: test_cast_int_to_ptr1_private
+// SPIR64: ret ptr null
+// AMDGCN: ret ptr addrspace(5) null
+private int* test_cast_int_to_ptr1_private(void) {
+  return (private int*)((void)0, 0);
+}
+
+// CHECK-LABEL: test_cast_int_to_ptr1_local
+// CHECK: ret ptr addrspace(3) null
+local int* test_cast_int_to_ptr1_local(void) {
+  return (local int*)((void)0, 0);
+}
+
+// CHECK-LABEL: test_cast_int_to_ptr2
+// SPIR64: ret ptr null
+// AMDGCN: ret ptr addrspace(5) null
+private int* test_cast_int_to_ptr2(void) {
+  int x = 0;
+  return (private int*)x;
+}
+
+// Test logical operations.
+// CHECK-LABEL: test_not_nullptr
+// CHECK: ret i32 1
+int test_not_nullptr(void) {
+  return !(private char*)NULL;
+}
+
+// CHECK-LABEL: test_and_nullptr
+// CHECK: ret i32 0
+int test_and_nullptr(int a) {
+  return a && ((private char*)NULL);
+}
+
+// CHECK-LABEL: test_not_private_ptr
+// SPIR64: %[[lnot:.*]] = icmp eq ptr %p, addrspacecast (ptr addrspace(4) null to ptr)
+// AMDGCN: %[[lnot:.*]] = icmp eq ptr addrspace(5) %p, addrspacecast (ptr null to ptr addrspace(5))
+// CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
+// CHECK: ret i32 %[[lnot_ext]]
+int test_not_private_ptr(private char* p) {
+  return !p;
+}
+
+// CHECK-LABEL: test_not_local_ptr
+// SPIR64: %[[lnot:.*]] = icmp eq ptr addrspace(3) %p, addrspacecast (ptr addrspace(4) null to ptr addrspace(3))
+// AMDGCN: %[[lnot:.*]] = icmp eq ptr addrspace(3) %p, addrspacecast (ptr null to ptr addrspace(3))
+// CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
+// CHECK: ret i32 %[[lnot_ext]]
+int test_not_local_ptr(local char* p) {
+  return !p;
+}
+
+
+// CHECK-LABEL: test_and_ptr
+// SPIR64: %[[tobool:.*]] = icmp ne ptr %p1, addrspacecast (ptr addrspace(4) null to ptr)
+// SPIR64: %[[tobool1:.*]] = icmp ne ptr addrspace(3) %p2, addrspacecast (ptr addrspace(4) null to ptr addrspace(3))
+// AMDGCN: %[[tobool:.*]] = icmp ne ptr addrspace(5) %p1, addrspacecast (ptr null to ptr addrspace(5))
+// AMDGCN: %[[tobool1:.*]] = icmp ne ptr addrspace(3) %p2, addrspacecast (ptr null to ptr addrspace(3))
+// CHECK: %[[res:.*]] = select i1 %[[tobool]], i1 %[[tobool1]], i1 false
+// CHECK: %[[land_ext:.*]] = zext i1 %[[res]] to i32
+// CHECK: ret i32 %[[land_ext]]
+int test_and_ptr(private char* p1, local char* p2) {
+  return p1 && p2;
+}
+
+// Test folding of null pointer in function scope.
+// CHECK-NOOPT-LABEL: test_fold_private
+// SPIR64-NOOPT:  call{{.*}} void @test_fold_callee
+// SPIR64-NOOPT:  store ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), ptr %glob{{.*}}, align 8
+// SPIR64-NOOPT:  %{{.*}} = sub i64 %{{.*}}, ptrtoint (ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)) to i64)
+// AMDGCN-NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8
+// AMDGCN-NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
+// SPIR64-NOOPT:  call{{.*}} void @test_fold_callee
+// SPIR64-NOOPT:  %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i32) to i64
+// AMDGCN-NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i32) to i64
+// CHECK-NOOPT: %{{.*}} = add nsw i64 %{{.*}}, %[[SEXT]]
+// CHECK-NOOPT: %{{.*}} = sub nsw i64 %{{.*}}, 1
+void test_fold_callee(void);
+void test_fold_private(void) {
+  global int* glob = (test_fold_callee(), (global int*)(generic char*)0);
+  long x = glob - (global int*)(generic char*)0;
+  x = x + (int)(test_fold_callee(), (private int*)(generic char*)(global short*)0);
+  x = x - (int)((private int*)0 == (private int*)(generic char*)0);
+}
+
+// CHECK-NOOPT-LABEL: test_fold_local
+// CHECK-NOOPT:  call{{.*}} void @test_fold_callee
+// SPIR64-NOOPT: store ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)), ptr %glob{{.*}}, align 8
+// SPIR64-NOOPT: %{{.*}} = sub i64 %{{.*}}, ptrtoint (ptr addrspace(1) addrspacecast (ptr addrspace(4) null to ptr addrspace(1)) to i64)
+// AMDGCN-NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8
+// AMDGCN-NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
+// CHECK-NOOPT:  call{{.*}} void @test_fold_callee
+// SPIR64-NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)) to i32) to i64
+// AMDGCN-NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i32) to i64
+// CHECK-NOOPT: %{{.*}} = add nsw i64 %{{.*}}, %[[SEXT]]
+// CHECK-NOOPT: %{{.*}} = sub nsw i64 %{{.*}}, 1
+void test_fold_local(void) {
+  global int* glob = (test_fold_callee(), (global int*)(generic char*)0);
+  long x = glob - (global int*)(generic char*)0;
+  x = x + (int)(test_fold_callee(), (local int*)(generic char*)(global short*)0);
+  x = x - (int)((local int*)0 == (local int*)(generic char*)0);
+}
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 3717c44..f1660b1 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -304,13 +304,6 @@
 // AMX-COMPLEX: "-target-feature" "+amx-complex"
 // NO-AMX-COMPLEX: "-target-feature" "-amx-complex"
 
-// RUN: %clang --target=x86_64-unknown-linux-gnu -mamx-transpose %s \
-// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=AMX-TRANSPOSE %s
-// RUN: %clang --target=x86_64-unknown-linux-gnu -mno-amx-transpose %s \
-// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AMX-TRANSPOSE %s
-// AMX-TRANSPOSE: "-target-feature" "+amx-transpose"
-// NO-AMX-TRANSPOSE: "-target-feature" "-amx-transpose"
-
 // RUN: %clang --target=x86_64-unknown-linux-gnu -mamx-avx512 %s \
 // RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=AMX-AVX512 %s
 // RUN: %clang --target=x86_64-unknown-linux-gnu -mno-amx-avx512 %s \
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index cdb4632..cf2cd4a 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -1841,7 +1841,6 @@
 // CHECK_DMR_M32: #define __AMX_MOVRS__ 1
 // CHECK_DMR_M32: #define __AMX_TF32__ 1
 // CHECK_GNR_M32: #define __AMX_TILE__ 1
-// CHECK_DMR_M32: #define __AMX_TRANSPOSE__ 1
 // CHECK_DMR_M32: #define __AVX10_2_512__ 1
 // CHECK_DMR_M32: #define __AVX10_2__ 1
 // CHECK_GNR_M32: #define __AVX2__ 1
@@ -1947,7 +1946,6 @@
 // CHECK_DMR_M64: #define __AMX_MOVRS__ 1
 // CHECK_DMR_M64: #define __AMX_TF32__ 1
 // CHECK_GNR_M64: #define __AMX_TILE__ 1
-// CHECK_DMR_M64: #define __AMX_TRANSPOSE__ 1
 // CHECK_DMR_M64: #define __AVX10_2_512__ 1
 // CHECK_DMR_M64: #define __AVX10_2__ 1
 // CHECK_GNR_M64: #define __AVX2__ 1
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 5f17641..78f8b19 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -526,18 +526,6 @@
 
 // NO-AMX-COMPLEX-NOT: #define __AMX_COMPLEX__ 1
 
-// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-transpose -x c \
-// RUN: -E -dM -o - %s | FileCheck  -check-prefix=AMX-TRANSPOSE %s
-
-// AMX-TRANSPOSE: #define __AMX_TRANSPOSE__ 1
-
-// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-transpose -x c \
-// RUN: -E -dM -o - %s | FileCheck  -check-prefix=NO-AMX-TRANSPOSE %s
-// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-transpose -mno-amx-tile \
-// RUN: -x c -E -dM -o - %s | FileCheck  -check-prefix=NO-AMX-TRANSPOSE %s
-
-// NO-AMX-TRANSPOSE-NOT: #define __AMX_TRANSPOSE__ 1
-
 // RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-avx512 -x c \
 // RUN: -E -dM -o - %s | FileCheck  -check-prefix=AMX-AVX512 %s
 
diff --git a/clang/test/SemaCXX/attr-callback-broken.cpp b/clang/test/SemaCXX/attr-callback-broken.cpp
index a5469b2..53b331a 100644
--- a/clang/test/SemaCXX/attr-callback-broken.cpp
+++ b/clang/test/SemaCXX/attr-callback-broken.cpp
@@ -1,7 +1,12 @@
-// RUN: %clang_cc1 %s -verify -fsyntax-only
+// RUN: %clang_cc1 %s -std=c++23 -verify -fsyntax-only
 
 class C_in_class {
 #define HAS_THIS
 #include "../Sema/attr-callback-broken.c"
 #undef HAS_THIS
 };
+
+class ExplicitParameterObject {
+  __attribute__((callback(2, 0))) void explicit_this_idx(this ExplicitParameterObject* self, void (*callback)(ExplicitParameterObject*));           // expected-error {{'callback' argument at position 2 references unavailable implicit 'this'}}
+  __attribute__((callback(2, this))) void explicit_this_identifier(this ExplicitParameterObject* self, void (*callback)(ExplicitParameterObject*)); // expected-error {{'callback' argument at position 2 references unavailable implicit 'this'}}
+};
diff --git a/clang/test/SemaCXX/attr-callback.cpp b/clang/test/SemaCXX/attr-callback.cpp
index ee02f7d..ff5a241 100644
--- a/clang/test/SemaCXX/attr-callback.cpp
+++ b/clang/test/SemaCXX/attr-callback.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -verify -fsyntax-only
+// RUN: %clang_cc1 %s -std=c++23 -verify -fsyntax-only
 
 // expected-no-diagnostics
 
@@ -6,6 +6,11 @@ class C_in_class {
 #include "../Sema/attr-callback.c"
 };
 
+class ExplicitParameterObject {
+  __attribute__((callback(2, 1))) void explicit_this_idx(this ExplicitParameterObject* self, void (*callback)(ExplicitParameterObject*));
+  __attribute__((callback(2, self))) void explicit_this_identifier(this ExplicitParameterObject* self, void (*callback)(ExplicitParameterObject*));
+};
+
 struct Base {
 
   void no_args_1(void (*callback)(void));
diff --git a/clang/test/SemaCXX/attr-format.cpp b/clang/test/SemaCXX/attr-format.cpp
index adc05fc..c0aeb5d 100644
--- a/clang/test/SemaCXX/attr-format.cpp
+++ b/clang/test/SemaCXX/attr-format.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -Wformat-nonliteral -verify %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++23 -Wformat-nonliteral -verify %s
 #include <stdarg.h>
 
 int printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
@@ -11,6 +11,10 @@ struct S {
   // the format argument is argument 2 here.
   void g(const char*, ...) __attribute__((format(printf, 2, 3)));
   const char* g2(const char*) __attribute__((format_arg(2)));
+  // From C++23 'this' can also be specified explicitly.
+  void g3(this S&, const char *, ...) __attribute__((format(printf, 2, 3)));
+  void g4(this const char* s, ...) __attribute__((format(printf, 1, 2)));
+  consteval operator const char*() const { return "%f"; } // #g4_fmt_string
 
   void h(const char*, ...) __attribute__((format(printf, 1, 4))); // \
       expected-error{{implicit this argument as the format string}}
@@ -18,10 +22,17 @@ struct S {
       expected-error{{out of bounds}}
   const char* h3(const char*) __attribute__((format_arg(1))); // \
       expected-error{{invalid for the implicit this argument}}
+  void h4(this S&, const char *, ...) __attribute__((format(printf, 1, 3))); // \
+      expected-error {{format argument not a string type}}
 
   void operator() (const char*, ...) __attribute__((format(printf, 2, 3)));
 };
 
+void s() {
+  S().g4(4); // expected-warning {{format specifies type 'double' but the argument has type 'int'}}
+             // expected-note@#g4_fmt_string {{format string is defined here}}
+}
+
 // PR5521
 struct A { void a(const char*,...) __attribute((format(printf,2,3))); };
 void b(A x) {
diff --git a/clang/test/SemaCXX/attr-lifetime-capture-by.cpp b/clang/test/SemaCXX/attr-lifetime-capture-by.cpp
index 70a5fe5..8606592 100644
--- a/clang/test/SemaCXX/attr-lifetime-capture-by.cpp
+++ b/clang/test/SemaCXX/attr-lifetime-capture-by.cpp
@@ -44,4 +44,7 @@ struct T {
   {
     s.captureInt(x);
   }
+
+  void explicit_this1(this T& self, const int &x [[clang::lifetime_capture_by(self)]]);
+  void explicit_this2(this T& self, const int &x [[clang::lifetime_capture_by(this)]]); // expected-error {{argument references unavailable implicit 'this'}}
 };
diff --git a/clang/test/SemaCXX/attr-nonnull.cpp b/clang/test/SemaCXX/attr-nonnull.cpp
index 6f9119b..0fba6b5 100644
--- a/clang/test/SemaCXX/attr-nonnull.cpp
+++ b/clang/test/SemaCXX/attr-nonnull.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
 struct S {
   S(const char *) __attribute__((nonnull(2)));
 
@@ -11,6 +11,13 @@ struct S {
 
   void h(const char*) __attribute__((nonnull(1))); // \
       expected-error{{invalid for the implicit this argument}}
+
+  void i(this S* self, const char*) __attribute__((nonnull(1)));
+
+  void j(this S* self, const char*) __attribute__((nonnull(2)));
+
+  void k(this S* self, const char*) __attribute__((nonnull(3))); // \
+      expected-error{{'nonnull' attribute parameter 1 is out of bounds}}
 };
 
 void test() {
diff --git a/clang/tools/scan-view/share/ScanView.py b/clang/tools/scan-view/share/ScanView.py
index a89bf3f..9c11013 100644
--- a/clang/tools/scan-view/share/ScanView.py
+++ b/clang/tools/scan-view/share/ScanView.py
@@ -1,40 +1,19 @@
-from __future__ import print_function
-
-try:
-    from http.server import HTTPServer, SimpleHTTPRequestHandler
-except ImportError:
-    from BaseHTTPServer import HTTPServer
-    from SimpleHTTPServer import SimpleHTTPRequestHandler
+from http.server import HTTPServer, SimpleHTTPRequestHandler
 import os
 import sys
-
-try:
-    from urlparse import urlparse
-    from urllib import unquote
-except ImportError:
-    from urllib.parse import urlparse, unquote
-
+from urllib.parse import urlparse, unquote
 import posixpath
-
-if sys.version_info.major >= 3:
-    from io import StringIO, BytesIO
-else:
-    from io import BytesIO, BytesIO as StringIO
-
+from io import StringIO, BytesIO
 import re
 import shutil
 import threading
 import time
 import socket
 import itertools
+import configparser
 
 import Reporter
 
-try:
-    import configparser
-except ImportError:
-    import ConfigParser as configparser
-
 ###
 # Various patterns matched or replaced by server.
 
diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp
index adf5e74..a1fcbb0 100644
--- a/clang/unittests/Tooling/RangeSelectorTest.cpp
+++ b/clang/unittests/Tooling/RangeSelectorTest.cpp
@@ -527,6 +527,31 @@ TEST(RangeSelectorTest, NameOpDeclRefError) {
           AllOf(HasSubstr(Ref), HasSubstr("requires property 'identifier'")))));
 }
 
+TEST(RangeSelectorTest, NameOpDeclInMacroArg) {
+  StringRef Code = R"cc(
+  #define MACRO(name) int name;
+  MACRO(x)
+  )cc";
+  const char *ID = "id";
+  TestMatch Match = matchCode(Code, varDecl().bind(ID));
+  EXPECT_THAT_EXPECTED(select(name(ID), Match), HasValue("x"));
+}
+
+TEST(RangeSelectorTest, NameOpDeclInMacroBodyError) {
+  StringRef Code = R"cc(
+  #define MACRO int x;
+  MACRO
+  )cc";
+  const char *ID = "id";
+  TestMatch Match = matchCode(Code, varDecl().bind(ID));
+  EXPECT_THAT_EXPECTED(
+      name(ID)(Match.Result),
+      Failed<StringError>(testing::Property(
+          &StringError::getMessage,
+          AllOf(HasSubstr("range selected by name(node id="),
+                HasSubstr("' is different from decl name 'x'")))));
+}
+
 TEST(RangeSelectorTest, CallArgsOp) {
   const StringRef Code = R"cc(
     struct C {
diff --git a/clang/utils/CmpDriver b/clang/utils/CmpDriver
index 12ce7a3..0732baa 100755
--- a/clang/utils/CmpDriver
+++ b/clang/utils/CmpDriver
@@ -5,6 +5,7 @@ A simple utility that compares tool invocations and exit codes issued by
 compiler drivers that support -### (e.g. gcc and clang).
 """
 
+from itertools import zip_longest
 import subprocess
 
 def splitArgs(s):
@@ -22,7 +23,7 @@ def splitArgs(s):
         elif inQuote:
             if c == '\\':
                 current += c
-                current += it.next()
+                current += next(it)
             else:
                 current += c
         elif not c.isspace():
@@ -135,77 +136,77 @@ def main():
 
     # Compare stdout.
     if infoA.stdout != infoB.stdout:
-        print '-- STDOUT DIFFERS -'
-        print 'A OUTPUT: ',infoA.stdout
-        print 'B OUTPUT: ',infoB.stdout
-        print
+        print('-- STDOUT DIFFERS -')
+        print('A OUTPUT: ',infoA.stdout)
+        print('B OUTPUT: ',infoB.stdout)
+        print()
 
         diff = ZipperDiff(infoA.stdout.split('\n'),
                           infoB.stdout.split('\n'))
         for i,(aElt,bElt) in enumerate(diff.getDiffs()):
             if aElt is None:
-                print 'A missing: %s' % bElt
+                print('A missing: %s' % bElt)
             elif bElt is None:
-                print 'B missing: %s' % aElt
+                print('B missing: %s' % aElt)
             else:
-                print 'mismatch: A: %s' % aElt
-                print '          B: %s' % bElt
+                print('mismatch: A: %s' % aElt)
+                print('          B: %s' % bElt)
 
         differ = True
 
     # Compare stderr.
     if infoA.stderr != infoB.stderr:
-        print '-- STDERR DIFFERS -'
-        print 'A STDERR: ',infoA.stderr
-        print 'B STDERR: ',infoB.stderr
-        print
+        print('-- STDERR DIFFERS -')
+        print('A STDERR: ',infoA.stderr)
+        print('B STDERR: ',infoB.stderr)
+        print()
 
         diff = ZipperDiff(infoA.stderr.split('\n'),
                           infoB.stderr.split('\n'))
         for i,(aElt,bElt) in enumerate(diff.getDiffs()):
             if aElt is None:
-                print 'A missing: %s' % bElt
+                print('A missing: %s' % bElt)
             elif bElt is None:
-                print 'B missing: %s' % aElt
+                print('B missing: %s' % aElt)
             else:
-                print 'mismatch: A: %s' % aElt
-                print '          B: %s' % bElt
+                print('mismatch: A: %s' % aElt)
+                print('          B: %s' % bElt)
 
         differ = True
 
     # Compare commands.
-    for i,(a,b) in enumerate(map(None, infoA.commands, infoB.commands)):
+    for i,(a,b) in enumerate(zip_longest(infoA.commands, infoB.commands, fillvalue=None)):
         if a is None:
-            print 'A MISSING:',' '.join(b)
+            print('A MISSING:',' '.join(b))
             differ = True
             continue
         elif b is None:
-            print 'B MISSING:',' '.join(a)
+            print('B MISSING:',' '.join(a))
             differ = True
             continue
 
         diff = DriverZipperDiff(a,b)
         diffs = list(diff.getDiffs())
         if diffs:
-            print '-- COMMAND %d DIFFERS -' % i
-            print 'A COMMAND:',' '.join(a)
-            print 'B COMMAND:',' '.join(b)
-            print
+            print('-- COMMAND %d DIFFERS -' % i)
+            print('A COMMAND:',' '.join(a))
+            print('B COMMAND:',' '.join(b))
+            print()
             for i,(aElt,bElt) in enumerate(diffs):
                 if aElt is None:
-                    print 'A missing: %s' % bElt
+                    print('A missing: %s' % bElt)
                 elif bElt is None:
-                    print 'B missing: %s' % aElt
+                    print('B missing: %s' % aElt)
                 else:
-                    print 'mismatch: A: %s' % aElt
-                    print '          B: %s' % bElt
+                    print('mismatch: A: %s' % aElt)
+                    print('          B: %s' % bElt)
             differ = True
     
     # Compare result codes.
     if infoA.exitCode != infoB.exitCode:
-        print '-- EXIT CODES DIFFER -'
-        print 'A: ',infoA.exitCode
-        print 'B: ',infoB.exitCode
+        print('-- EXIT CODES DIFFER -')
+        print('A: ',infoA.exitCode)
+        print('B: ',infoB.exitCode)
         differ = True
 
     if differ:
diff --git a/clang/utils/check_cfc/check_cfc.py b/clang/utils/check_cfc/check_cfc.py
index 8d42ec5..7658f6c 100755
--- a/clang/utils/check_cfc/check_cfc.py
+++ b/clang/utils/check_cfc/check_cfc.py
@@ -56,11 +56,7 @@ import shutil
 import subprocess
 import sys
 import tempfile
-
-try:
-    import configparser
-except ImportError:
-    import ConfigParser as configparser
+import configparser
 import io
 
 import obj_diff